2022-01-05 01:14:23 +00:00
|
|
|
import type { EmbeddedFile, ImageProcessor } from "./main";
|
2022-01-09 15:03:53 +00:00
|
|
|
import { localLoad, settings } from "./stores";
|
2022-01-12 04:18:50 +00:00
|
|
|
import { Buffer } from "buffer";
|
2022-01-26 20:45:15 +00:00
|
|
|
import jpeg from 'jpeg-js';
|
|
|
|
import { bmvbhash_even } from "./phash";
|
2022-01-29 20:01:45 +00:00
|
|
|
import { ifetch, Platform } from "./platform";
|
2022-01-05 01:14:23 +00:00
|
|
|
|
2022-01-12 08:09:30 +00:00
|
|
|
export let csettings: Parameters<typeof settings['set']>[0];
|
|
|
|
settings.subscribe(b => {
|
|
|
|
csettings = b;
|
|
|
|
});
|
|
|
|
|
2022-01-05 01:14:23 +00:00
|
|
|
export type Booru = {
|
2022-01-09 15:12:48 +00:00
|
|
|
disabled?: boolean;
|
2022-01-08 07:29:19 +00:00
|
|
|
name: string;
|
2022-01-05 01:14:23 +00:00
|
|
|
domain: string;
|
|
|
|
endpoint: string;
|
|
|
|
quirks: tran;
|
|
|
|
};
|
|
|
|
|
|
|
|
export type BooruMatch = {
|
2022-01-07 04:43:28 +00:00
|
|
|
source?: string;
|
|
|
|
page?: string;
|
2022-01-05 01:14:23 +00:00
|
|
|
tags: string[];
|
|
|
|
full_url: string;
|
|
|
|
preview_url: string;
|
|
|
|
ext: string;
|
|
|
|
};
|
|
|
|
|
|
|
|
type tran = (a: any) => BooruMatch[];
|
|
|
|
|
2022-01-08 07:29:19 +00:00
|
|
|
function firstThatFor<T>(promises: Promise<T>[], pred: (v: T) => boolean) {
|
|
|
|
Promise.any(promises.map(async p => {
|
|
|
|
const v = await p;
|
|
|
|
if (pred(v))
|
|
|
|
return v;
|
|
|
|
throw v;
|
2022-01-08 22:08:20 +00:00
|
|
|
}));
|
2022-01-08 07:29:19 +00:00
|
|
|
}
|
|
|
|
|
2022-02-21 23:01:38 +00:00
|
|
|
const gelquirk: (s: string) => tran = prefix => (a => {
|
|
|
|
let base = (a.post || a.data || a);
|
|
|
|
if (!Array.isArray(base))
|
|
|
|
return [];
|
|
|
|
base = base.filter(e => e.file_url);
|
|
|
|
return base.map((e: any) => ({
|
2022-01-05 01:14:23 +00:00
|
|
|
full_url: e.file_url,
|
2022-01-09 19:41:04 +00:00
|
|
|
preview_url: e.preview_url || e.preview_url,
|
2022-01-07 04:43:28 +00:00
|
|
|
source: e.source,
|
2022-01-09 14:29:51 +00:00
|
|
|
ext: e.file_ext || e.file_url.substr(e.file_url.lastIndexOf('.') + 1),
|
2022-01-16 20:01:37 +00:00
|
|
|
page: `${prefix}${(e.id || e.parent_id)}`,
|
2022-01-16 20:25:29 +00:00
|
|
|
tags: (e.tag_string || (e.tags
|
|
|
|
&& (Array.isArray(e.tags)
|
|
|
|
&& (typeof e.tags[0] == "string" ? e.tags.join(' ') : e.tags.map((e: any) => e.name_en).join(' '))) || e.tags) || '').split(' ')
|
2022-02-21 23:01:38 +00:00
|
|
|
} as BooruMatch)) || [];
|
|
|
|
});
|
2022-01-05 01:14:23 +00:00
|
|
|
|
2022-01-12 04:18:50 +00:00
|
|
|
let experimentalApi = false;
|
2022-01-26 20:45:15 +00:00
|
|
|
let black = new Set<string>();
|
|
|
|
let phashEn = false;
|
|
|
|
let mindist = 5;
|
2022-01-09 14:29:51 +00:00
|
|
|
settings.subscribe(s => {
|
2022-01-12 04:18:50 +00:00
|
|
|
experimentalApi = s.expte;
|
2022-01-09 14:29:51 +00:00
|
|
|
boorus = s.rsources.map(e => ({
|
|
|
|
...e,
|
2022-01-09 15:03:53 +00:00
|
|
|
quirks: gelquirk(e.view)
|
2022-01-09 14:29:51 +00:00
|
|
|
}));
|
2022-01-26 20:45:15 +00:00
|
|
|
black = new Set(s.blacklist);
|
|
|
|
mindist = s.mdist || 5;
|
|
|
|
phashEn = s.phash;
|
2022-01-09 14:29:51 +00:00
|
|
|
});
|
2022-01-26 20:45:15 +00:00
|
|
|
|
2022-01-09 15:03:53 +00:00
|
|
|
export let boorus: Booru[] =
|
|
|
|
localLoad('settingsv2', { rsources: [] as (Omit<Booru, 'quirks'> & { view: string, disabled?: boolean })[] })
|
|
|
|
.rsources.map(e => ({
|
|
|
|
...e,
|
|
|
|
quirks: gelquirk(e.view)
|
|
|
|
}));
|
2022-01-05 01:14:23 +00:00
|
|
|
|
2022-01-12 04:18:50 +00:00
|
|
|
const bufferingTime = 2000;
|
|
|
|
let expired: number | undefined = undefined;
|
|
|
|
type ApiResult = { [md5 in string]: { [domain in string]: BooruMatch[] } };
|
|
|
|
let reqQueue: [string, (a: ApiResult) => void][] = [];
|
|
|
|
let unlockQueue = Promise.resolve();
|
|
|
|
|
|
|
|
const queryCache: ApiResult = {};
|
|
|
|
const processQueries = async () => {
|
|
|
|
let unlock!: () => void;
|
|
|
|
unlockQueue = new Promise<void>(_ => unlock = _);
|
|
|
|
const md5 = reqQueue.map(e => e[0]).filter(e => !(e in queryCache));
|
|
|
|
expired = undefined;
|
|
|
|
if (md5.length > 0) {
|
|
|
|
const res = await fetch("https://shoujo.coom.tech/api", {
|
|
|
|
method: "POST",
|
|
|
|
body: JSON.stringify({ md5 }),
|
|
|
|
headers: {
|
|
|
|
'content-type': 'application/json'
|
|
|
|
}
|
|
|
|
});
|
|
|
|
const results: ApiResult = await res.json();
|
|
|
|
Object.entries(results).forEach(e => queryCache[e[0]] = e[1]);
|
|
|
|
}
|
|
|
|
reqQueue.forEach(e => e[1]({ [e[0]]: queryCache[e[0]] }));
|
|
|
|
reqQueue = [];
|
|
|
|
unlock();
|
|
|
|
};
|
|
|
|
|
|
|
|
const queueForProcessing = async (hex: string, cb: (a: ApiResult) => void) => {
|
|
|
|
console.log("putting", hex, 'in queue');
|
|
|
|
await unlockQueue;
|
|
|
|
console.log("put", hex, 'in queue');
|
|
|
|
reqQueue.push([hex, cb]);
|
|
|
|
if (!expired) {
|
|
|
|
expired = setTimeout(processQueries, bufferingTime);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-01-05 01:14:23 +00:00
|
|
|
const cache: any = {};
|
|
|
|
|
2022-01-12 04:18:50 +00:00
|
|
|
const shoujoFind = async (hex: string): Promise<ApiResult> => {
|
|
|
|
return new Promise(res => {
|
|
|
|
queueForProcessing(hex, res);
|
|
|
|
});
|
|
|
|
};
|
|
|
|
|
2022-01-08 18:01:01 +00:00
|
|
|
const findFileFrom = async (b: Booru, hex: string, abort?: EventTarget) => {
|
2022-01-05 01:14:23 +00:00
|
|
|
try {
|
2022-01-16 18:39:39 +00:00
|
|
|
/* if (experimentalApi) {
|
|
|
|
const res = await shoujoFind(hex);
|
|
|
|
if (!res)
|
|
|
|
debugger;
|
|
|
|
return hex in res ? (res[hex][b.domain] || []) : [];
|
|
|
|
}*/
|
2022-01-05 19:37:41 +00:00
|
|
|
if (b.domain in cache && hex in cache[b.domain])
|
|
|
|
return cache[b.domain][hex] as BooruMatch[];
|
2022-01-29 20:01:45 +00:00
|
|
|
const res = await ifetch(`https://${b.domain}${b.endpoint}${hex}`);
|
2022-01-05 01:14:23 +00:00
|
|
|
// might throw because some endpoint respond with invalid json when an error occurs
|
|
|
|
const pres = await res.json();
|
2022-01-05 14:04:07 +00:00
|
|
|
const tran = b.quirks(pres).filter(e => !e.tags.some(e => black.has(e)));
|
2022-01-05 19:37:41 +00:00
|
|
|
if (!(b.domain in cache))
|
|
|
|
cache[b.domain] = {};
|
|
|
|
cache[b.domain][hex] = tran;
|
2022-01-05 01:14:23 +00:00
|
|
|
return tran;
|
2022-02-21 23:01:38 +00:00
|
|
|
} catch(e) {
|
|
|
|
console.error('The following error might be expected');
|
|
|
|
console.error(e);
|
2022-01-05 01:14:23 +00:00
|
|
|
return [];
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
const extract = async (b: Buffer, fn?: string) => {
|
2022-01-05 19:37:41 +00:00
|
|
|
let result!: BooruMatch[];
|
2022-01-08 07:29:19 +00:00
|
|
|
let booru!: string;
|
2022-01-05 19:37:41 +00:00
|
|
|
for (const e of Object.values(boorus)) {
|
2022-01-09 15:12:48 +00:00
|
|
|
if (e.disabled)
|
2022-01-05 19:37:41 +00:00
|
|
|
continue;
|
|
|
|
result = await findFileFrom(e, fn!.substring(0, 32));
|
2022-01-12 08:09:30 +00:00
|
|
|
|
2022-01-08 07:29:19 +00:00
|
|
|
if (result.length) {
|
|
|
|
booru = e.name;
|
2022-01-05 19:37:41 +00:00
|
|
|
break;
|
2022-01-08 07:29:19 +00:00
|
|
|
}
|
2022-01-05 19:37:41 +00:00
|
|
|
}
|
2022-01-05 22:20:20 +00:00
|
|
|
let cachedFile: ArrayBuffer;
|
2022-01-07 10:56:39 +00:00
|
|
|
const prev = result[0].preview_url;
|
|
|
|
const full = result[0].full_url;
|
2022-01-12 07:11:13 +00:00
|
|
|
return [{
|
2022-01-07 04:43:28 +00:00
|
|
|
source: result[0].source,
|
2022-01-26 20:45:15 +00:00
|
|
|
page: {
|
|
|
|
title: booru,
|
|
|
|
url: result[0].page
|
|
|
|
},
|
2022-01-05 22:20:20 +00:00
|
|
|
filename: fn!.substring(0, 33) + result[0].ext,
|
2022-01-29 23:51:00 +00:00
|
|
|
thumbnail: csettings.hotlink ? (prev || full) : Buffer.from(await (await ifetch(prev || full)).arrayBuffer()),
|
2022-01-26 20:45:15 +00:00
|
|
|
data: csettings.hotlink ? (full || prev) : (async (lsn) => {
|
2022-01-05 22:20:20 +00:00
|
|
|
if (!cachedFile)
|
2022-01-29 20:01:45 +00:00
|
|
|
cachedFile = (await (await ifetch(full || prev, undefined, lsn)).arrayBuffer());
|
2022-01-29 23:51:00 +00:00
|
|
|
return Buffer.from(cachedFile);
|
2022-01-26 20:45:15 +00:00
|
|
|
})
|
2022-01-12 07:11:13 +00:00
|
|
|
} as EmbeddedFile];
|
2022-01-05 01:14:23 +00:00
|
|
|
};
|
|
|
|
|
2022-01-26 20:45:15 +00:00
|
|
|
const phash = (b: Buffer) => {
|
|
|
|
const res = jpeg.decode(b);
|
|
|
|
return bmvbhash_even(res, 8);
|
|
|
|
};
|
|
|
|
|
|
|
|
// a & b are hex strings
|
|
|
|
const hammingDist = (a: string, b: string) => {
|
|
|
|
let res = BigInt('0x' + a) ^ BigInt('0x' + b);
|
|
|
|
let acc = 0;
|
|
|
|
while (res != 0n) {
|
|
|
|
acc += Number(res & 1n);
|
|
|
|
res >>= 1n;
|
|
|
|
}
|
|
|
|
return acc;
|
|
|
|
};
|
|
|
|
|
|
|
|
const has_embed = async (b: Buffer, fn?: string, prevlink?: string) => {
|
2022-01-05 01:14:23 +00:00
|
|
|
// It's not worth to bother skipping images with filenames that match their md5 because
|
|
|
|
// 4chan reencodes jpegs, which is well over half the files posted
|
2022-01-06 10:28:56 +00:00
|
|
|
|
|
|
|
// ok fine you autists
|
|
|
|
if (Buffer.from(fn!, 'hex').equals(b))
|
|
|
|
return false;
|
|
|
|
|
2022-01-05 01:14:23 +00:00
|
|
|
let result: BooruMatch[] | undefined = undefined;
|
|
|
|
for (const e of Object.values(boorus)) {
|
2022-01-09 15:12:48 +00:00
|
|
|
if (e.disabled)
|
2022-01-05 01:14:23 +00:00
|
|
|
continue;
|
|
|
|
result = await findFileFrom(e, fn!.substring(0, 32));
|
2022-01-07 10:56:39 +00:00
|
|
|
result = result.filter(e => e.full_url || e.preview_url); // skips possible paywalls
|
2022-01-05 19:37:41 +00:00
|
|
|
if (result.length)
|
|
|
|
break;
|
2022-01-05 01:14:23 +00:00
|
|
|
}
|
2022-01-26 20:45:15 +00:00
|
|
|
|
|
|
|
if ((result && result.length != 0) && phashEn && prevlink) {
|
|
|
|
const getHash = async (l: string) => {
|
2022-01-29 20:01:45 +00:00
|
|
|
const ogreq = await ifetch(l);
|
2022-01-26 20:45:15 +00:00
|
|
|
const origPreview = await ogreq.arrayBuffer();
|
2022-01-29 20:01:45 +00:00
|
|
|
return phash(Buffer.from(origPreview));
|
2022-01-26 20:45:15 +00:00
|
|
|
};
|
|
|
|
const [orighash, tehash] = await Promise.all([
|
|
|
|
getHash(prevlink),
|
|
|
|
getHash(result[0].preview_url)
|
|
|
|
]);
|
|
|
|
const d = hammingDist(orighash, tehash);
|
|
|
|
console.log(d, prevlink);
|
|
|
|
return d > mindist;
|
|
|
|
}
|
|
|
|
|
2022-01-05 01:14:23 +00:00
|
|
|
return result && result.length != 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
export default {
|
|
|
|
skip: true,
|
|
|
|
extract,
|
|
|
|
has_embed,
|
2022-01-16 18:39:39 +00:00
|
|
|
match: fn => !!fn.match(/^[0-9a-f]{32}\.....?/)
|
2022-01-05 01:14:23 +00:00
|
|
|
} as ImageProcessor;
|