|
|
@ -1,9 +1,8 @@ |
|
|
|
import * as tf from '@tensorflow/tfjs'; |
|
|
|
import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm'; |
|
|
|
import modelJSON from './model.json'; |
|
|
|
import * as ccl from './ccl'; |
|
|
|
|
|
|
|
const charset = [' ', '0', '2', '4', '5', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y']; |
|
|
|
const charset = [' ', '0', '2', '4', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y']; |
|
|
|
let weightsData: Uint8Array; // base64 encoded weights
|
|
|
|
let model: tf.LayersModel; |
|
|
|
|
|
|
@ -269,11 +268,16 @@ async function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: |
|
|
|
const scale = th / h; |
|
|
|
|
|
|
|
const canvas = document.createElement('canvas'); |
|
|
|
const fcanvas = document.createElement('canvas'); |
|
|
|
const cw = w * scale + pw * 2; |
|
|
|
canvas.width = cw >= 300 ? 300 : cw; |
|
|
|
canvas.height = th; |
|
|
|
|
|
|
|
const ctx = canvas.getContext('2d', { willReadFrequently: true })!; |
|
|
|
fcanvas.width = 300; |
|
|
|
fcanvas.height = 80; |
|
|
|
|
|
|
|
const ctx = canvas.getContext('2d')!; |
|
|
|
const fctx = fcanvas.getContext('2d')!; // used to contain the captcha stretched to 300w
|
|
|
|
|
|
|
|
ctx.fillStyle = 'rgb(238,238,238)'; |
|
|
|
ctx.fillRect(0, 0, canvas.width, canvas.height); |
|
|
@ -296,96 +300,26 @@ async function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: |
|
|
|
); |
|
|
|
} |
|
|
|
ctx.drawImage(img, -w / 2, -h / 2, w, h); |
|
|
|
fctx.drawImage(canvas, 0, 0, 300, 80); |
|
|
|
}; |
|
|
|
|
|
|
|
if (bg && off == null) { |
|
|
|
off = await slideCaptcha(document.getElementById('t-fg')!, document.getElementById('t-bg')!, document.getElementById('t-slider') as HTMLInputElement); |
|
|
|
} |
|
|
|
draw(off || 0); |
|
|
|
return ctx.getImageData(0, 0, canvas.width, canvas.height); |
|
|
|
} |
|
|
|
|
|
|
|
// for debugging purposes
|
|
|
|
function imagedataToImage(imagedata: ImageData) { |
|
|
|
const canvas = document.createElement('canvas'); |
|
|
|
const ctx = canvas.getContext('2d')!; |
|
|
|
canvas.width = imagedata.width; |
|
|
|
canvas.height = imagedata.height; |
|
|
|
ctx.putImageData(imagedata, 0, 0); |
|
|
|
|
|
|
|
const image = new Image(); |
|
|
|
image.src = canvas.toDataURL(); |
|
|
|
return image; |
|
|
|
return fctx.getImageData(0, 0, 300, 80); |
|
|
|
} |
|
|
|
|
|
|
|
function toMonochrome(px: Uint8ClampedArray) { |
|
|
|
function toMonochromeFloat(px: Uint8ClampedArray) { |
|
|
|
const ret = Array<number>(px.length >> 2); |
|
|
|
for (let i = 0; i < px.length; i += 4) { |
|
|
|
ret[i >> 2] = +(px[i] < 128); |
|
|
|
ret[i >> 2] = px[i] / 255; |
|
|
|
} |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
const greedyCTCDecode = (yPred: tf.Tensor<tf.Rank>) => tf.tidy(() => yPred.argMax(-1).arraySync()); |
|
|
|
|
|
|
|
function imgDisp(pixConv: (f: ArrayLike<number>, w: number, h: number, s: Uint8ClampedArray) => ArrayLike<number>, img: ArrayLike<number>, w: number, h: number, t?: boolean) { |
|
|
|
const dt = new ImageData(w, h); |
|
|
|
const rgba = pixConv(img, w, h, dt.data); |
|
|
|
const imgres = imagedataToImage(dt); |
|
|
|
document.body.appendChild(imgres); |
|
|
|
if (t) { |
|
|
|
imgres.style.transform = 'rotate(90deg) scaleY(-1)'; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
let colors = [ |
|
|
|
[255, 0, 0], // Red
|
|
|
|
[0, 255, 0], // Green
|
|
|
|
[0, 0, 255], // Blue
|
|
|
|
[255, 255, 0], // Yellow
|
|
|
|
[255, 0, 255], // Magenta
|
|
|
|
[0, 255, 255], // Cyan
|
|
|
|
[128, 0, 0], // Dark Red
|
|
|
|
[0, 128, 0], // Dark Green
|
|
|
|
[0, 0, 128], // Dark Blue
|
|
|
|
[128, 128, 0], // Olive
|
|
|
|
[128, 0, 128], // Purple
|
|
|
|
[0, 128, 128], // Teal
|
|
|
|
[192, 192, 192], // Silver
|
|
|
|
[128, 128, 128], // Gray
|
|
|
|
[255, 165, 0], // Orange
|
|
|
|
[0, 128, 64] // Medium Sea Green
|
|
|
|
]; |
|
|
|
|
|
|
|
const monoToPalette = (p: number[][], max: number) => |
|
|
|
function (arr: ArrayLike<number>, w: number, h: number, res: Uint8ClampedArray) { |
|
|
|
let choice = p.slice(0); |
|
|
|
const choices = new Map<number, number[]>(); |
|
|
|
for (let i = 0; i < arr.length; ++i) { |
|
|
|
let col: number[]; |
|
|
|
if (choices.has(arr[i])) { |
|
|
|
col = choices.get(arr[i])!; |
|
|
|
} else { |
|
|
|
col = choice.shift()!; |
|
|
|
choices.set(arr[i], col); |
|
|
|
if (choice.length == 0) |
|
|
|
choice = p.slice(0); |
|
|
|
} |
|
|
|
[res[i * 4], res[i * 4 + 1], res[i * 4 + 2]] = col; |
|
|
|
res[i * 4 + 3] = 255; |
|
|
|
} |
|
|
|
return res; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
function monoToRgba(arr: ArrayLike<number>, w: number, h: number, res: Uint8ClampedArray) { |
|
|
|
for (let i = 0; i < arr.length; ++i) { |
|
|
|
res[i * 4] = res[i * 4 + 1] = res[i * 4 + 2] = arr[i] * 255; |
|
|
|
res[i * 4 + 3] = 255; |
|
|
|
} |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
function processCTCDecodedSequence(decodedSequence: number[], blankLabel = 0) { |
|
|
|
const result = []; |
|
|
|
let prevLabel = blankLabel; |
|
|
@ -411,114 +345,21 @@ async function predict(img: HTMLImageElement, bg: HTMLImageElement, off: number) |
|
|
|
const image = await imageFromCanvas(img, bg, off); |
|
|
|
if (!image) |
|
|
|
throw new Error("Failed to gen image"); |
|
|
|
const mono = toMonochrome(image.data); |
|
|
|
console.log(mono.reduce((a, b) => a + b), 0); |
|
|
|
const labels = ccl.connectedComponentLabeling(mono, image.width, image.height); |
|
|
|
const props = ccl.computeBounds(labels, image.width, image.height); |
|
|
|
|
|
|
|
const sortedByArea = Object.entries(props).sort((a, b) => a[1].area - b[1].area); |
|
|
|
const n = 8; |
|
|
|
let eightBiggest = sortedByArea.slice(0, -n); |
|
|
|
//const filtered = new Float32Array(80 * 300);
|
|
|
|
|
|
|
|
// TODO: maybe centering?
|
|
|
|
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
|
|
|
|
|
|
|
|
for (const [label, region] of eightBiggest) { |
|
|
|
//if ((region.maxRow - region.minRow) <= 20) {
|
|
|
|
// continue;
|
|
|
|
//}
|
|
|
|
|
|
|
|
for (let y = region.minRow; y <= region.maxRow; ++y) { |
|
|
|
for (let x = region.minCol; x <= region.maxCol; ++x) { |
|
|
|
if (labels[y * image!.width + x] === +label) { |
|
|
|
labels[y * image!.width + x] = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
eightBiggest = sortedByArea.slice(-n); |
|
|
|
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
|
|
|
|
const mono = toMonochromeFloat(image.data); |
|
|
|
|
|
|
|
for (const [label, region] of eightBiggest) { |
|
|
|
if ((region.maxRow - region.minRow) > 20) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
for (let y = region.minRow; y <= region.maxRow; ++y) { |
|
|
|
for (let x = region.minCol; x <= region.maxCol; ++x) { |
|
|
|
if (labels[y * image!.width + x] === +label) { |
|
|
|
labels[y * image!.width + x] = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
|
|
|
|
|
|
|
|
for (const [label, region] of eightBiggest) { |
|
|
|
if ((region.maxRow - region.minRow) <= 20) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
for (let y = region.minRow; y <= region.maxRow; ++y) { |
|
|
|
for (let x = region.minCol; x <= region.maxCol; ++x) { |
|
|
|
if (labels[y * image!.width + x] === +label) { |
|
|
|
labels[y * image!.width + x] = 1; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
const filtered2 = tf.tensor3d(labels, [image.height, image.width, 1]).concat(tf.zeros([80, 300 - image.width, 1]), 1); |
|
|
|
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
|
|
|
|
//const tensor = tf.tensor3d(filtered, [80, 300, 1], 'float32');
|
|
|
|
//const tr = [1, 0, 2];
|
|
|
|
//console.log(tensor.shape, tensor.transpose(tr).shape);
|
|
|
|
const filtered2 = tf.tensor3d(mono, [image.height, image.width, 1]); |
|
|
|
const prediction = model.predict(filtered2.transpose([1, 0, 2]).expandDims(0)); |
|
|
|
|
|
|
|
let d: tf.TypedArray; |
|
|
|
|
|
|
|
if (!Array.isArray(prediction)) { |
|
|
|
const v = greedyCTCDecode(prediction) as number[][]; |
|
|
|
console.log(v); |
|
|
|
const s = processCTCDecodedSequence(v[0], charset.length + 1); |
|
|
|
return indicesToSymbols(s).join('').trim(); |
|
|
|
} else |
|
|
|
throw new Error("unexpected inference"); |
|
|
|
if (Array.isArray(prediction)) |
|
|
|
throw new Error("Unexpected inference type"); |
|
|
|
|
|
|
|
// createSequence(d);
|
|
|
|
return ''; |
|
|
|
const v = greedyCTCDecode(prediction) as number[][]; |
|
|
|
const s = processCTCDecodedSequence(v[0], charset.length + 1); |
|
|
|
return indicesToSymbols(s).join('').trim(); |
|
|
|
} |
|
|
|
|
|
|
|
function createSequence(prediction: any) { |
|
|
|
const csl = charset.length; |
|
|
|
const sequence: Record<string, number>[] = []; |
|
|
|
|
|
|
|
// for each prediction
|
|
|
|
for (let pos = 0; pos < prediction.length; pos += csl) { |
|
|
|
// look at the probabilities for the 22 token characters
|
|
|
|
const preds = prediction.slice(pos, pos + csl); |
|
|
|
const max = Math.max(...preds); |
|
|
|
|
|
|
|
const seqElem: Record<string, number> = {}; |
|
|
|
|
|
|
|
for (let i = 0; i < csl; i++) { |
|
|
|
const p = preds[i] / max; // normalize probability
|
|
|
|
const c = charset[i + 1]; |
|
|
|
|
|
|
|
if (p >= 0.05) { // if it's probable enough
|
|
|
|
seqElem[c || ''] = p; // save its probability, to give alternative solutions
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
sequence.push(seqElem); |
|
|
|
} |
|
|
|
|
|
|
|
return sequence; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
async function imageFromUri(uri: string) { |
|
|
|
if (uri.startsWith('url("')) { |
|
|
|
uri = uri.substr(5, uri.length - 7); |
|
|
|