Skip to content

Instantly share code, notes, and snippets.

@Sasquire
Last active November 27, 2021 12:13
Show Gist options
  • Save Sasquire/3f1fab5b4f32f2a89336cc3428cad6c5 to your computer and use it in GitHub Desktop.
Save Sasquire/3f1fab5b4f32f2a89336cc3428cad6c5 to your computer and use it in GitHub Desktop.

If you happen to have a large collection of images, tagged with artists, from another site, then it could be possible to search for the image on e621 and potentially apply the artist tag. The same holds for character tags, assuming artists and character owners post images they have made or comissioned on these sites. This fails if other people start uploading the same images, such as a specialized fetish-reposting account.

This repository contains some of the tools needed to do this matching on Furaffinities site.

character_artist_tags_by_post.json is a file produced by the sql

select post_id, tag_name, tag_type from (
	select unnest(tags) as tag_name, post_id, row_number() over (partition by post_id order by change_seq desc)
	from e621.change_history
) as tags_post_table
inner join e621.tags using (tag_name)
where row_number <= 1
	and (tag_type = 'artist' or tag_type = 'character')

and output using dbeaver's json export.

and sorted.txt.data has the format e621:<e621_post_id> download-<is_sourced> with fa:<fa_post_id>\t<fa_artist_lower> and as an example

e621:126668 download-*match with fa:5462583	---
e621:126715 download-*match with fa:2674522	---
e621:48519 download-*match with fa:5518496	---
e621:728700 download-match with fa:15176228	--skittles--
e621:957464 download-*match with fa:14315632	-araku-
e621:957467 download-*match with fa:14315610	-araku-
e621:1049673 download-*match with fa:14424392	-arctic-
e621:809835 download-*match with fa:15438704	-arctic-
e621:366456 download-match with fa:8148985	-aru-
e621:161585 download-match with fa:5418127	-ash
e621:165008 download-*match with fa:6349459	-ash
e621:401256 download-*match with fa:9794037	-ash
e621:276126 download-*match with fa:9374388	-axiom-

The sqlite3 database used in 0_create_post_pairs.js has the schema

CREATE TABLE artwork (
	filename INTEGER, 
	post_id INTEGER PRIMARY KEY, 
	is_error_page INTEGER, 
	download_md5 TEXT, 
	full_md5 TEXT, 
	thumb_md5 TEXT, 
	download_url TEXT, 
	full_url TEXT, 
	thumb_url TEXT, 
	avatar_url TEXT, 
	artist_lower TEXT, 
	artist_upper TEXT, 
	title TEXT, 
	description TEXT, 
	date_posted TEXT, 
	category TEXT, 
	theme TEXT, 
	species TEXT, 
	gender TEXT, 
	favorites INTEGER, 
	comments INTEGER, 
	views INTEGER, 
	width INTEGER, 
	height INTEGER, 
	keywords TEXT, 
	rating TEXT, 
	camera TEXT, 
	date_and_time TEXT, 
	aperture TEXT, 
	iso_speed TEXT, 
	focal_length TEXT, 
	flash TEXT, 
	exposure TEXT, 
	metering_mode TEXT, 
	light_source TEXT 
);

1_merge_files_and_guess.js is where the magic happens and the final output is produced.

const path = require('path');
const utils = // utils from the Furry-Database project;
const fs = require('fs');
const JSONStream = require('JSONStream');
const sqlite3 = require('sqlite3').verbose();
const sqlite_db = new sqlite3.Database(__dirname + '/database.db');
sqlite_db.promise_get = (sql) => new Promise((res, rej) => sqlite_db.get(sql, (err, row) => err ? rej(err) : res(row)));
sqlite_db.promise_all = (sql) => new Promise((res, rej) => sqlite_db.all(sql, (err, all) => err ? rej(err) : res(all)));
sqlite_db.promise_run = (sql) => new Promise((res, rej) => sqlite_db.run(sql, (err) => err ? rej(err) : err == null ? res() : 0));
async function main () {
console.log('getting e6 data');
const db_results = await utils.db.query_raw('select actual_md5, change_seq, post_id, sources from e621.files inner join e621.change_history using (given_md5)');
const known_md5s = {};
for (const item of db_results) {
known_md5s[item.actual_md5] = item;
}
// fs.writeFilySync('./e6_data.json', JSON.stringify(known_md5s), 'utf8');
console.log('getting fa data');
const sqlite_results = await sqlite_db.promise_all('select download_md5, full_md5, thumb_md5, post_id, artist_lower from artwork where is_error_page == 0;');
console.log('working on data');
for (const fa_post of sqlite_results) {
if (fa_post.error_page === true) {
continue;
}
if (known_md5s[fa_post.download_md5] !== undefined) {
const this_post = known_md5s[fa_post.download_md5];
const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
if (is_sourced === false) {
console.log(`e621:${this_post.post_id} download-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
} else {
console.log(`e621:${this_post.post_id} download-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
}
// download_md5
}
if ((fa_post.full_md5 !== fa_post.download_md5) && known_md5s[fa_post.full_md5] !== undefined) {
const this_post = known_md5s[fa_post.full_md5];
const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
if (is_sourced === false) {
console.log(`e621:${this_post.post_id} full-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
} else {
console.log(`e621:${this_post.post_id} full-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
}
// full_md5
}
if (known_md5s[fa_post.thumb_md5] !== undefined) {
const this_post = known_md5s[fa_post.thumb_md5];
const is_sourced = this_post.sources.some(e => e.includes('furaffinity.net/view') && e.includes(fa_post.post_id));
if (is_sourced) {
console.log(`e621:${this_post.post_id} thumb-*match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
} else {
console.log(`e621:${this_post.post_id} thumb-match with fa:${fa_post.post_id}\t${fa_post.artist_lower}`)
}
// something with thunb
}
}
}
main();
const fs = require('fs');
const tags = Object.values(require('./character_artist_tags_by_post.json'))[0];
const matches = fs.readFileSync('./sorted.txt.data', 'utf8').split('\n').filter(e => e)
.map(e => e.split(/\s+/g))
.map(e => ({
artist: e[4],
e6_id: parseInt(e[0].split(':')[1], 10),
fa_id: parseInt(e[3].split(':')[1], 10),
type: e[1].split('-')[0],
sourced: e[1].includes('*') === false
}));
async function main () {
const tag_map = {};
tags.forEach(e => {
if (tag_map[e.post_id] === undefined) {
tag_map[e.post_id] = [];
}
tag_map[e.post_id].push(e);
});
// console.log(tag_map)
const fa_artists = {};
let counter = 0;
for (const fa_post of matches) {
const tags_for_this_post = tag_map[fa_post.e6_id] || [];
if (fa_artists[fa_post.artist] === undefined) {
fa_artists[fa_post.artist] = {
fa_lower: fa_post.artist,
artists: [],
characters: [],
posts: []
}
}
tags_for_this_post
.filter(e => e.tag_type === 'character')
.forEach(e => fa_artists[fa_post.artist].characters.push(e.tag_name));
tags_for_this_post
.filter(e => e.tag_type === 'artist')
.forEach(e => fa_artists[fa_post.artist].artists.push(e.tag_name));
tags_for_this_post
.forEach(e => fa_artists[fa_post.artist].posts.push(e.post_id));
console.log(`${++counter}/${matches.length}`);
}
const final_result = Object.values(fa_artists).map(e => ({
fa_lower: e.fa_lower,
artist_tags: count_instances(e.artists).sort((a, b) => b[1] - a[1]),
character_tags: count_instances(e.characters).sort((a, b) => b[1] - a[1]),
posts: [...new Set(e.posts)]
}));
console.log(final_result);
fs.writeFileSync('./final_character_name_guesses.json', JSON.stringify(final_result));
}
function count_instances (arr) {
const dict = {};
for (const i of arr) {
if (dict[i] === undefined) {
dict[i] = 1;
} else {
dict[i]++;
}
}
return Object.entries(dict);
}
// { post_id: 435, tag_name: 'paige_(paige)', tag_type: 'character' },
/* {
artist: '-ky-',
e6_id: 416031,
fa_id: 11834191,
type: 'download',
sourced: true
}, */
main();
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment