PEE/src/thirdeye.ts

240 lines
7.3 KiB
TypeScript
Raw Normal View History

2022-01-05 01:14:23 +00:00
import type { EmbeddedFile, ImageProcessor } from "./main";
2022-01-09 15:03:53 +00:00
import { localLoad, settings } from "./stores";
2022-01-12 04:18:50 +00:00
import { Buffer } from "buffer";
import {decode} from 'jpeg-js/lib/decoder';
2022-01-26 20:45:15 +00:00
import { bmvbhash_even } from "./phash";
2022-01-29 20:01:45 +00:00
import { ifetch, Platform } from "./platform";
2022-01-05 01:14:23 +00:00
2022-01-12 08:09:30 +00:00
export let csettings: Parameters<typeof settings['set']>[0];
settings.subscribe(b => {
csettings = b;
});
2022-01-05 01:14:23 +00:00
export type Booru = {
2022-01-09 15:12:48 +00:00
disabled?: boolean;
name: string;
2022-01-05 01:14:23 +00:00
domain: string;
endpoint: string;
quirks: tran;
};
export type BooruMatch = {
2022-01-07 04:43:28 +00:00
source?: string;
page?: string;
2022-01-05 01:14:23 +00:00
tags: string[];
full_url: string;
preview_url: string;
ext: string;
};
type tran = (a: any) => BooruMatch[];
function firstThatFor<T>(promises: Promise<T>[], pred: (v: T) => boolean) {
Promise.any(promises.map(async p => {
const v = await p;
if (pred(v))
return v;
throw v;
2022-01-08 22:08:20 +00:00
}));
}
2022-02-21 23:01:38 +00:00
const gelquirk: (s: string) => tran = prefix => (a => {
let base = (a.post || a.data || a);
if (!Array.isArray(base))
return [];
base = base.filter(e => e.file_url);
return base.map((e: any) => ({
2022-01-05 01:14:23 +00:00
full_url: e.file_url,
2022-01-09 19:41:04 +00:00
preview_url: e.preview_url || e.preview_url,
2022-01-07 04:43:28 +00:00
source: e.source,
2022-01-09 14:29:51 +00:00
ext: e.file_ext || e.file_url.substr(e.file_url.lastIndexOf('.') + 1),
2022-01-16 20:01:37 +00:00
page: `${prefix}${(e.id || e.parent_id)}`,
2022-01-16 20:25:29 +00:00
tags: (e.tag_string || (e.tags
&& (Array.isArray(e.tags)
&& (typeof e.tags[0] == "string" ? e.tags.join(' ') : e.tags.map((e: any) => e.name_en).join(' '))) || e.tags) || '').split(' ')
2022-02-21 23:01:38 +00:00
} as BooruMatch)) || [];
});
2022-01-05 01:14:23 +00:00
2022-01-12 04:18:50 +00:00
let experimentalApi = false;
2022-01-26 20:45:15 +00:00
let black = new Set<string>();
let phashEn = false;
let mindist = 5;
2022-01-09 14:29:51 +00:00
settings.subscribe(s => {
2022-01-12 04:18:50 +00:00
experimentalApi = s.expte;
2022-01-09 14:29:51 +00:00
boorus = s.rsources.map(e => ({
...e,
2022-01-09 15:03:53 +00:00
quirks: gelquirk(e.view)
2022-01-09 14:29:51 +00:00
}));
2022-01-26 20:45:15 +00:00
black = new Set(s.blacklist);
mindist = s.mdist || 5;
phashEn = s.phash;
2022-01-09 14:29:51 +00:00
});
2022-01-26 20:45:15 +00:00
2022-01-09 15:03:53 +00:00
export let boorus: Booru[] =
localLoad('settingsv2', { rsources: [] as (Omit<Booru, 'quirks'> & { view: string, disabled?: boolean })[] })
.rsources.map(e => ({
...e,
quirks: gelquirk(e.view)
}));
2022-01-05 01:14:23 +00:00
2022-01-12 04:18:50 +00:00
const bufferingTime = 2000;
let expired: number | NodeJS.Timeout | undefined = undefined;
2022-01-12 04:18:50 +00:00
type ApiResult = { [md5 in string]: { [domain in string]: BooruMatch[] } };
let reqQueue: [string, (a: ApiResult) => void][] = [];
let unlockQueue = Promise.resolve();
const queryCache: ApiResult = {};
const processQueries = async () => {
let unlock!: () => void;
unlockQueue = new Promise<void>(_ => unlock = _);
const md5 = reqQueue.map(e => e[0]).filter(e => !(e in queryCache));
expired = undefined;
if (md5.length > 0) {
const res = await fetch("https://shoujo.coom.tech/api", {
method: "POST",
body: JSON.stringify({ md5 }),
headers: {
'content-type': 'application/json'
}
});
const results: ApiResult = await res.json();
Object.entries(results).forEach(e => queryCache[e[0]] = e[1]);
}
reqQueue.forEach(e => e[1]({ [e[0]]: queryCache[e[0]] }));
reqQueue = [];
unlock();
};
const queueForProcessing = async (hex: string, cb: (a: ApiResult) => void) => {
console.log("putting", hex, 'in queue');
await unlockQueue;
console.log("put", hex, 'in queue');
reqQueue.push([hex, cb]);
if (!expired) {
expired = setTimeout(processQueries, bufferingTime);
}
};
2022-01-05 01:14:23 +00:00
const cache: any = {};
2022-01-12 04:18:50 +00:00
const shoujoFind = async (hex: string): Promise<ApiResult> => {
return new Promise(res => {
queueForProcessing(hex, res);
});
};
2022-01-08 18:01:01 +00:00
const findFileFrom = async (b: Booru, hex: string, abort?: EventTarget) => {
2022-01-05 01:14:23 +00:00
try {
2022-01-16 18:39:39 +00:00
/* if (experimentalApi) {
const res = await shoujoFind(hex);
if (!res)
debugger;
return hex in res ? (res[hex][b.domain] || []) : [];
}*/
2022-01-05 19:37:41 +00:00
if (b.domain in cache && hex in cache[b.domain])
return cache[b.domain][hex] as BooruMatch[];
2022-01-29 20:01:45 +00:00
const res = await ifetch(`https://${b.domain}${b.endpoint}${hex}`);
2022-01-05 01:14:23 +00:00
// might throw because some endpoint respond with invalid json when an error occurs
const pres = await res.json();
2022-01-05 14:04:07 +00:00
const tran = b.quirks(pres).filter(e => !e.tags.some(e => black.has(e)));
2022-01-05 19:37:41 +00:00
if (!(b.domain in cache))
cache[b.domain] = {};
cache[b.domain][hex] = tran;
2022-01-05 01:14:23 +00:00
return tran;
2022-02-21 23:01:38 +00:00
} catch(e) {
console.error('The following error might be expected');
console.error(e);
2022-01-05 01:14:23 +00:00
return [];
}
};
const extract = async (b: Buffer, fn?: string) => {
2022-01-05 19:37:41 +00:00
let result!: BooruMatch[];
let booru!: string;
2022-01-05 19:37:41 +00:00
for (const e of Object.values(boorus)) {
2022-01-09 15:12:48 +00:00
if (e.disabled)
2022-01-05 19:37:41 +00:00
continue;
result = await findFileFrom(e, fn!.substring(0, 32));
2022-01-12 08:09:30 +00:00
if (result.length) {
booru = e.name;
2022-01-05 19:37:41 +00:00
break;
}
2022-01-05 19:37:41 +00:00
}
2022-01-05 22:20:20 +00:00
let cachedFile: ArrayBuffer;
2022-01-07 10:56:39 +00:00
const prev = result[0].preview_url;
const full = result[0].full_url;
2022-01-12 07:11:13 +00:00
return [{
2022-01-07 04:43:28 +00:00
source: result[0].source,
2022-01-26 20:45:15 +00:00
page: {
title: booru,
url: result[0].page
},
2022-01-05 22:20:20 +00:00
filename: fn!.substring(0, 33) + result[0].ext,
2022-01-29 23:51:00 +00:00
thumbnail: csettings.hotlink ? (prev || full) : Buffer.from(await (await ifetch(prev || full)).arrayBuffer()),
2022-01-26 20:45:15 +00:00
data: csettings.hotlink ? (full || prev) : (async (lsn) => {
2022-01-05 22:20:20 +00:00
if (!cachedFile)
2022-01-29 20:01:45 +00:00
cachedFile = (await (await ifetch(full || prev, undefined, lsn)).arrayBuffer());
2022-01-29 23:51:00 +00:00
return Buffer.from(cachedFile);
2022-01-26 20:45:15 +00:00
})
2022-01-12 07:11:13 +00:00
} as EmbeddedFile];
2022-01-05 01:14:23 +00:00
};
2022-01-26 20:45:15 +00:00
const phash = (b: Buffer) => {
const res = decode(b);
2022-01-26 20:45:15 +00:00
return bmvbhash_even(res, 8);
};
// a & b are hex strings
const hammingDist = (a: string, b: string) => {
let res = BigInt('0x' + a) ^ BigInt('0x' + b);
let acc = 0;
while (res != 0n) {
acc += Number(res & 1n);
res >>= 1n;
}
return acc;
};
const has_embed = async (b: Buffer, fn?: string, prevlink?: string) => {
2022-01-05 01:14:23 +00:00
// It's not worth to bother skipping images with filenames that match their md5 because
// 4chan reencodes jpegs, which is well over half the files posted
// ok fine you autists
if (Buffer.from(fn!, 'hex').equals(b))
return false;
2022-01-05 01:14:23 +00:00
let result: BooruMatch[] | undefined = undefined;
for (const e of Object.values(boorus)) {
2022-01-09 15:12:48 +00:00
if (e.disabled)
2022-01-05 01:14:23 +00:00
continue;
result = await findFileFrom(e, fn!.substring(0, 32));
2022-01-07 10:56:39 +00:00
result = result.filter(e => e.full_url || e.preview_url); // skips possible paywalls
2022-01-05 19:37:41 +00:00
if (result.length)
break;
2022-01-05 01:14:23 +00:00
}
2022-01-26 20:45:15 +00:00
if ((result && result.length != 0) && phashEn && prevlink) {
const getHash = async (l: string) => {
2022-01-29 20:01:45 +00:00
const ogreq = await ifetch(l);
2022-01-26 20:45:15 +00:00
const origPreview = await ogreq.arrayBuffer();
2022-01-29 20:01:45 +00:00
return phash(Buffer.from(origPreview));
2022-01-26 20:45:15 +00:00
};
const [orighash, tehash] = await Promise.all([
getHash(prevlink),
getHash(result[0].preview_url)
]);
const d = hammingDist(orighash, tehash);
console.log(d, prevlink);
return d > mindist;
}
2022-01-05 01:14:23 +00:00
return result && result.length != 0;
};
export default {
skip: true,
extract,
has_embed,
2022-01-16 18:39:39 +00:00
match: fn => !!fn.match(/^[0-9a-f]{32}\.....?/)
2022-01-05 01:14:23 +00:00
} as ImageProcessor;