From 2a22e44b233b9d82d11a125db6f05091d9cb5f72 Mon Sep 17 00:00:00 2001 From: coomdev Date: Thu, 3 Aug 2023 11:27:44 +0200 Subject: [PATCH] accidentally left a debug thing --- src/main.js | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/main.js b/src/main.js index b0b64c8..17d011f 100644 --- a/src/main.js +++ b/src/main.js @@ -145,7 +145,6 @@ function imageFromCanvas (img, bg, off) { const scale = th / h const canvas = document.createElement('canvas') - document.body.append(canvas) canvas.height = w * scale + pw * 2 canvas.width = th @@ -158,7 +157,7 @@ function imageFromCanvas (img, bg, off) { ctx.scale(-scale, scale) ctx.rotate((90 * Math.PI) / 180) - const adf = 0.3 + const adf = 1 / 3 const draw = function (off, adj) { if (adj) { @@ -216,7 +215,6 @@ function imageFromCanvas (img, bg, off) { imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height) bestImagedata = imgdata bestOff = off - console.log(off) } } @@ -260,8 +258,16 @@ async function predict (img, bg, off) { .fromPixels(image, 1) .mul(-1 / 238) .add(1) - const prediction = await model.predict(tensor.expandDims(0)).data() + // the image is rotated 90 degrees because it makes + // the pixels read by each invocation contiguous in memory, increasing performance + // model thus reads image left to right, and because of + // some conv layers, the total width ends up divided by 4 + // for each line read, it emits 22 predictions, one for each captcha character, + // including an "empty" token if nothing probable was found + const prediction = await model.predict(tensor.expandDims(0)).data() + // since it's read from left to right, the results are also written from left to right + // the solution is the sequence of most probable non-empty character from left to right return createSequence(prediction) } @@ -269,18 +275,20 @@ function createSequence (prediction) { const csl = charsetJSON.charset.length const sequence = [] + // for each prediction for (let pos = 0; pos < prediction.length; pos += csl) { + // look at the probabilities for the 22 token characters const preds = prediction.slice(pos, pos + csl) const max = Math.max(...preds) const seqElem = {} for (let i = 0; i < csl; i++) { - const p = preds[i] / max + const p = preds[i] / max // normalize probability const c = charsetJSON.charset[i + 1] - if (p >= 0.05) { - seqElem[c || ''] = p + if (p >= 0.05) { // if it's probable enough + seqElem[c || ''] = p // save its probability, to give alternative solutions } }