Sasquire · November 27, 2021 12:13
diff --git a/#Guessing-Artists-On-E621-Posts.md b/#Guessing-Artists-On-E621-Posts.md
diff --git a/0_create_post_pairs.js b/0_create_post_pairs.js
 const path = require('path');
 const utils = // utils from the Furry-Database project;
 const fs = require('fs');
 const JSONStream = require('JSONStream');

 const sqlite3 = require('sqlite3').verbose();
 const sqlite_db = new sqlite3.Database(__dirname + '/database.db');

 sqlite_db.promise_get = (sql) => new Promise((res, rej) => sqlite_db.get(sql, (err, row) => err ? rej(err) : res(row)));
 sqlite_db.promise_all = (sql) => new Promise((res, rej) => sqlite_db.all(sql, (err, all) => err ? rej(err) : res(all)));
 sqlite_db.promise_run = (sql) => new Promise((res, rej) => sqlite_db.run(sql, (err) => err ? rej(err) : err == null ? res() : 0));

 async function main () {
 	console.log('getting e6 data');
 	const db_results = await utils.db.query_raw('select actual_md5, change_seq, post_id, sources from e621.files inner join e621.change_history using (given_md5)');
 	const known_md5s = {};
 	for (const item of db_results) {
 		known_md5s[item.actual_md5] = item;
 	}
 	// fs.writeFilySync('./e6_data.json', JSON.stringify(known_md5s), 'utf8');

 	console.log('getting fa data');
 	const sqlite_results = await sqlite_db.promise_all('select download_md5, full_md5, thumb_md5, post_id, artist_lower from artwork where is_error_page == 0;');

 	console.log('working on data');
 	for (const fa_post of sqlite_results) {
 		if (fa_post.error_page === true) {
 			continue;
 		}

 		if (known_md5s[fa_post.download_md5] !== undefined) {
 			const this_post = known_md5s[fa_post.download_md5];
 			const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
 			if (is_sourced === false) {
 				console.log(`e621:${this_post.post_id} download-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			} else {
 				console.log(`e621:${this_post.post_id} download-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			}
 		// download_md5
 		}

 		if ((fa_post.full_md5 !== fa_post.download_md5) && known_md5s[fa_post.full_md5] !== undefined) {
 			const this_post = known_md5s[fa_post.full_md5];
 			const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
 			if (is_sourced === false) {
 				console.log(`e621:${this_post.post_id} full-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			} else {
 				console.log(`e621:${this_post.post_id} full-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			}
 			// full_md5
 		}

 		if (known_md5s[fa_post.thumb_md5] !== undefined) {
 			const this_post = known_md5s[fa_post.thumb_md5];
 			const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
 			if (is_sourced) {
 				console.log(`e621:${this_post.post_id} thumb-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			} else {
 				console.log(`e621:${this_post.post_id} thumb-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
 			}
 			// something with thunb
 		}
 	}
 }

 main();
diff --git a/1_merge_files_and_guess.js b/1_merge_files_and_guess.js
 const fs = require('fs');
 const tags = Object.values(require('./character_artist_tags_by_post.json'))[0];

 const matches = fs.readFileSync('./sorted.txt.data', 'utf8').split('\n').filter(e => e)
 		.map(e => e.split(/\s+/g))
 		.map(e => ({
 				artist: e[4],
 				e6_id: parseInt(e[0].split(':')[1], 10),
 				fa_id: parseInt(e[3].split(':')[1], 10),
 				type: e[1].split('-')[0],
 				sourced: e[1].includes('*') === false
 		}));

 async function main () {
 	const tag_map = {};
 	tags.forEach(e => {
 		if (tag_map[e.post_id] === undefined) {
 			tag_map[e.post_id] = [];
 		}

 		tag_map[e.post_id].push(e);
 	});

 	// console.log(tag_map)
 	
 	const fa_artists = {};
 	let counter = 0;
 	for (const fa_post of matches) {
 		const tags_for_this_post = tag_map[fa_post.e6_id] || [];
 		if (fa_artists[fa_post.artist] === undefined) {
 			fa_artists[fa_post.artist] = {
 				fa_lower: fa_post.artist,
 				artists: [],
 				characters: [],
 				posts: []
 			}
 		}

 		tags_for_this_post
 			.filter(e => e.tag_type === 'character')
 			.forEach(e => fa_artists[fa_post.artist].characters.push(e.tag_name));
 		
 		tags_for_this_post
 			.filter(e => e.tag_type === 'artist')
 			.forEach(e => fa_artists[fa_post.artist].artists.push(e.tag_name));

 		tags_for_this_post
 			.forEach(e => fa_artists[fa_post.artist].posts.push(e.post_id));

 		console.log(`${++counter}/${matches.length}`);
 	}
 	const final_result = Object.values(fa_artists).map(e => ({
 		fa_lower: e.fa_lower,
 		artist_tags: count_instances(e.artists).sort((a, b) => b[1] - a[1]),
 		character_tags: count_instances(e.characters).sort((a, b) => b[1] - a[1]),
 		posts: [...new Set(e.posts)]
 	}));
 	console.log(final_result);
 	fs.writeFileSync('./final_character_name_guesses.json', JSON.stringify(final_result));
 }

 function count_instances (arr) {
 	const dict = {};
 	for (const i of arr) {
 		if (dict[i] === undefined) {
 			dict[i] = 1;
 		} else {
 			dict[i]++;
 		}
 	}
 	return Object.entries(dict);
 }

 // { post_id: 435, tag_name: 'paige_(paige)', tag_type: 'character' },
 /*	 {
 	artist: '-ky-',
 	e6_id: 416031,
 	fa_id: 11834191,
 	type: 'download',
 	sourced: true
 	}, */

 main();
diff --git a/9_Output.json b/9_Output.json
	const path = require('path');
	const utils = // utils from the Furry-Database project;
	const fs = require('fs');
	const JSONStream = require('JSONStream');

	const sqlite3 = require('sqlite3').verbose();
	const sqlite_db = new sqlite3.Database(__dirname + '/database.db');

	sqlite_db.promise_get = (sql) => new Promise((res, rej) => sqlite_db.get(sql, (err, row) => err ? rej(err) : res(row)));
	sqlite_db.promise_all = (sql) => new Promise((res, rej) => sqlite_db.all(sql, (err, all) => err ? rej(err) : res(all)));
	sqlite_db.promise_run = (sql) => new Promise((res, rej) => sqlite_db.run(sql, (err) => err ? rej(err) : err == null ? res() : 0));

	async function main () {
	console.log('getting e6 data');
	const db_results = await utils.db.query_raw('select actual_md5, change_seq, post_id, sources from e621.files inner join e621.change_history using (given_md5)');
	const known_md5s = {};
	for (const item of db_results) {
	known_md5s[item.actual_md5] = item;
	}
	// fs.writeFilySync('./e6_data.json', JSON.stringify(known_md5s), 'utf8');

	console.log('getting fa data');
	const sqlite_results = await sqlite_db.promise_all('select download_md5, full_md5, thumb_md5, post_id, artist_lower from artwork where is_error_page == 0;');

	console.log('working on data');
	for (const fa_post of sqlite_results) {
	if (fa_post.error_page === true) {
	continue;
	}

	if (known_md5s[fa_post.download_md5] !== undefined) {
	const this_post = known_md5s[fa_post.download_md5];
	const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
	if (is_sourced === false) {
	console.log(`e621:${this_post.post_id} download-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	} else {
	console.log(`e621:${this_post.post_id} download-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	}
	// download_md5
	}

	if ((fa_post.full_md5 !== fa_post.download_md5) && known_md5s[fa_post.full_md5] !== undefined) {
	const this_post = known_md5s[fa_post.full_md5];
	const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
	if (is_sourced === false) {
	console.log(`e621:${this_post.post_id} full-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	} else {
	console.log(`e621:${this_post.post_id} full-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	}
	// full_md5
	}

	if (known_md5s[fa_post.thumb_md5] !== undefined) {
	const this_post = known_md5s[fa_post.thumb_md5];
	const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
	if (is_sourced) {
	console.log(`e621:${this_post.post_id} thumb-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	} else {
	console.log(`e621:${this_post.post_id} thumb-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
	}
	// something with thunb
	}
	}
	}

	main();
	const fs = require('fs');
	const tags = Object.values(require('./character_artist_tags_by_post.json'))[0];

	const matches = fs.readFileSync('./sorted.txt.data', 'utf8').split('\n').filter(e => e)
	.map(e => e.split(/\s+/g))
	.map(e => ({
	artist: e[4],
	e6_id: parseInt(e[0].split(':')[1], 10),
	fa_id: parseInt(e[3].split(':')[1], 10),
	type: e[1].split('-')[0],
	sourced: e[1].includes('*') === false
	}));

	async function main () {
	const tag_map = {};
	tags.forEach(e => {
	if (tag_map[e.post_id] === undefined) {
	tag_map[e.post_id] = [];
	}

	tag_map[e.post_id].push(e);
	});

	// console.log(tag_map)

	const fa_artists = {};
	let counter = 0;
	for (const fa_post of matches) {
	const tags_for_this_post = tag_map[fa_post.e6_id] \|\| [];
	if (fa_artists[fa_post.artist] === undefined) {
	fa_artists[fa_post.artist] = {
	fa_lower: fa_post.artist,
	artists: [],
	characters: [],
	posts: []
	}
	}

	tags_for_this_post
	.filter(e => e.tag_type === 'character')
	.forEach(e => fa_artists[fa_post.artist].characters.push(e.tag_name));

	tags_for_this_post
	.filter(e => e.tag_type === 'artist')
	.forEach(e => fa_artists[fa_post.artist].artists.push(e.tag_name));

	tags_for_this_post
	.forEach(e => fa_artists[fa_post.artist].posts.push(e.post_id));

	console.log(`${++counter}/${matches.length}`);
	}
	const final_result = Object.values(fa_artists).map(e => ({
	fa_lower: e.fa_lower,
	artist_tags: count_instances(e.artists).sort((a, b) => b[1] - a[1]),
	character_tags: count_instances(e.characters).sort((a, b) => b[1] - a[1]),
	posts: [...new Set(e.posts)]
	}));
	console.log(final_result);
	fs.writeFileSync('./final_character_name_guesses.json', JSON.stringify(final_result));
	}

	function count_instances (arr) {
	const dict = {};
	for (const i of arr) {
	if (dict[i] === undefined) {
	dict[i] = 1;
	} else {
	dict[i]++;
	}
	}
	return Object.entries(dict);
	}

	// { post_id: 435, tag_name: 'paige_(paige)', tag_type: 'character' },
	/* {
	artist: '-ky-',
	e6_id: 416031,
	fa_id: 11834191,
	type: 'download',
	sourced: true
	}, */

	main();