Browse Source

Remove pre-processing, use newer model trained on no-preprocessing

master
coomdev 9 months ago
parent
commit
e44c515780
  1. 627
      model_gen/captcha_ocr.ipynb
  2. 4
      src/group1-shard1of1.bin
  3. 195
      src/main.ts
  4. 2
      src/model.json
  5. 3
      src/model.weights.bin

627
model_gen/captcha_ocr.ipynb

File diff suppressed because one or more lines are too long

4
src/group1-shard1of1.bin

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f673767adc061648864cf142f2854df49c04ba0f717da152130cb007627e150d
size 1863520
oid sha256:8e7624ac711c06c8801ae29cd527bf908c9a9c94572d969aa297a7788fc8b6b3
size 1863004

195
src/main.ts

@ -1,9 +1,8 @@
import * as tf from '@tensorflow/tfjs';
import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm';
import modelJSON from './model.json';
import * as ccl from './ccl';
const charset = [' ', '0', '2', '4', '5', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y'];
const charset = [' ', '0', '2', '4', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y'];
let weightsData: Uint8Array; // base64 encoded weights
let model: tf.LayersModel;
@ -269,11 +268,16 @@ async function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off:
const scale = th / h;
const canvas = document.createElement('canvas');
const fcanvas = document.createElement('canvas');
const cw = w * scale + pw * 2;
canvas.width = cw >= 300 ? 300 : cw;
canvas.height = th;
const ctx = canvas.getContext('2d', { willReadFrequently: true })!;
fcanvas.width = 300;
fcanvas.height = 80;
const ctx = canvas.getContext('2d')!;
const fctx = fcanvas.getContext('2d')!; // used to contain the captcha stretched to 300w
ctx.fillStyle = 'rgb(238,238,238)';
ctx.fillRect(0, 0, canvas.width, canvas.height);
@ -296,96 +300,26 @@ async function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off:
);
}
ctx.drawImage(img, -w / 2, -h / 2, w, h);
fctx.drawImage(canvas, 0, 0, 300, 80);
};
if (bg && off == null) {
off = await slideCaptcha(document.getElementById('t-fg')!, document.getElementById('t-bg')!, document.getElementById('t-slider') as HTMLInputElement);
}
draw(off || 0);
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
// for debugging purposes
function imagedataToImage(imagedata: ImageData) {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d')!;
canvas.width = imagedata.width;
canvas.height = imagedata.height;
ctx.putImageData(imagedata, 0, 0);
const image = new Image();
image.src = canvas.toDataURL();
return image;
return fctx.getImageData(0, 0, 300, 80);
}
function toMonochrome(px: Uint8ClampedArray) {
function toMonochromeFloat(px: Uint8ClampedArray) {
const ret = Array<number>(px.length >> 2);
for (let i = 0; i < px.length; i += 4) {
ret[i >> 2] = +(px[i] < 128);
ret[i >> 2] = px[i] / 255;
}
return ret;
}
const greedyCTCDecode = (yPred: tf.Tensor<tf.Rank>) => tf.tidy(() => yPred.argMax(-1).arraySync());
function imgDisp(pixConv: (f: ArrayLike<number>, w: number, h: number, s: Uint8ClampedArray) => ArrayLike<number>, img: ArrayLike<number>, w: number, h: number, t?: boolean) {
const dt = new ImageData(w, h);
const rgba = pixConv(img, w, h, dt.data);
const imgres = imagedataToImage(dt);
document.body.appendChild(imgres);
if (t) {
imgres.style.transform = 'rotate(90deg) scaleY(-1)';
}
}
let colors = [
[255, 0, 0], // Red
[0, 255, 0], // Green
[0, 0, 255], // Blue
[255, 255, 0], // Yellow
[255, 0, 255], // Magenta
[0, 255, 255], // Cyan
[128, 0, 0], // Dark Red
[0, 128, 0], // Dark Green
[0, 0, 128], // Dark Blue
[128, 128, 0], // Olive
[128, 0, 128], // Purple
[0, 128, 128], // Teal
[192, 192, 192], // Silver
[128, 128, 128], // Gray
[255, 165, 0], // Orange
[0, 128, 64] // Medium Sea Green
];
const monoToPalette = (p: number[][], max: number) =>
function (arr: ArrayLike<number>, w: number, h: number, res: Uint8ClampedArray) {
let choice = p.slice(0);
const choices = new Map<number, number[]>();
for (let i = 0; i < arr.length; ++i) {
let col: number[];
if (choices.has(arr[i])) {
col = choices.get(arr[i])!;
} else {
col = choice.shift()!;
choices.set(arr[i], col);
if (choice.length == 0)
choice = p.slice(0);
}
[res[i * 4], res[i * 4 + 1], res[i * 4 + 2]] = col;
res[i * 4 + 3] = 255;
}
return res;
};
function monoToRgba(arr: ArrayLike<number>, w: number, h: number, res: Uint8ClampedArray) {
for (let i = 0; i < arr.length; ++i) {
res[i * 4] = res[i * 4 + 1] = res[i * 4 + 2] = arr[i] * 255;
res[i * 4 + 3] = 255;
}
return res;
}
function processCTCDecodedSequence(decodedSequence: number[], blankLabel = 0) {
const result = [];
let prevLabel = blankLabel;
@ -411,114 +345,21 @@ async function predict(img: HTMLImageElement, bg: HTMLImageElement, off: number)
const image = await imageFromCanvas(img, bg, off);
if (!image)
throw new Error("Failed to gen image");
const mono = toMonochrome(image.data);
console.log(mono.reduce((a, b) => a + b), 0);
const labels = ccl.connectedComponentLabeling(mono, image.width, image.height);
const props = ccl.computeBounds(labels, image.width, image.height);
const sortedByArea = Object.entries(props).sort((a, b) => a[1].area - b[1].area);
const n = 8;
let eightBiggest = sortedByArea.slice(0, -n);
//const filtered = new Float32Array(80 * 300);
// TODO: maybe centering?
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
for (const [label, region] of eightBiggest) {
//if ((region.maxRow - region.minRow) <= 20) {
// continue;
//}
for (let y = region.minRow; y <= region.maxRow; ++y) {
for (let x = region.minCol; x <= region.maxCol; ++x) {
if (labels[y * image!.width + x] === +label) {
labels[y * image!.width + x] = 0;
}
}
}
}
eightBiggest = sortedByArea.slice(-n);
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
const mono = toMonochromeFloat(image.data);
for (const [label, region] of eightBiggest) {
if ((region.maxRow - region.minRow) > 20) {
continue;
}
for (let y = region.minRow; y <= region.maxRow; ++y) {
for (let x = region.minCol; x <= region.maxCol; ++x) {
if (labels[y * image!.width + x] === +label) {
labels[y * image!.width + x] = 0;
}
}
}
}
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
for (const [label, region] of eightBiggest) {
if ((region.maxRow - region.minRow) <= 20) {
continue;
}
for (let y = region.minRow; y <= region.maxRow; ++y) {
for (let x = region.minCol; x <= region.maxCol; ++x) {
if (labels[y * image!.width + x] === +label) {
labels[y * image!.width + x] = 1;
}
}
}
}
const filtered2 = tf.tensor3d(labels, [image.height, image.width, 1]).concat(tf.zeros([80, 300 - image.width, 1]), 1);
//imgDisp(monoToPalette(colors, Math.max(...new Set(labels))), labels, image.width, image.height);
//const tensor = tf.tensor3d(filtered, [80, 300, 1], 'float32');
//const tr = [1, 0, 2];
//console.log(tensor.shape, tensor.transpose(tr).shape);
const filtered2 = tf.tensor3d(mono, [image.height, image.width, 1]);
const prediction = model.predict(filtered2.transpose([1, 0, 2]).expandDims(0));
let d: tf.TypedArray;
if (!Array.isArray(prediction)) {
const v = greedyCTCDecode(prediction) as number[][];
console.log(v);
const s = processCTCDecodedSequence(v[0], charset.length + 1);
return indicesToSymbols(s).join('').trim();
} else
throw new Error("unexpected inference");
if (Array.isArray(prediction))
throw new Error("Unexpected inference type");
// createSequence(d);
return '';
const v = greedyCTCDecode(prediction) as number[][];
const s = processCTCDecodedSequence(v[0], charset.length + 1);
return indicesToSymbols(s).join('').trim();
}
function createSequence(prediction: any) {
const csl = charset.length;
const sequence: Record<string, number>[] = [];
// for each prediction
for (let pos = 0; pos < prediction.length; pos += csl) {
// look at the probabilities for the 22 token characters
const preds = prediction.slice(pos, pos + csl);
const max = Math.max(...preds);
const seqElem: Record<string, number> = {};
for (let i = 0; i < csl; i++) {
const p = preds[i] / max; // normalize probability
const c = charset[i + 1];
if (p >= 0.05) { // if it's probable enough
seqElem[c || ''] = p; // save its probability, to give alternative solutions
}
}
sequence.push(seqElem);
}
return sequence;
}
async function imageFromUri(uri: string) {
if (uri.startsWith('url("')) {
uri = uri.substr(5, uri.length - 7);

2
src/model.json

File diff suppressed because one or more lines are too long

3
src/model.weights.bin

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b3d1959699ed9381dc680dcb6676e90670c6841c2b97fcf696ff6c2db261e1f5
size 15704896
Loading…
Cancel
Save