From 2a22e44b233b9d82d11a125db6f05091d9cb5f72 Mon Sep 17 00:00:00 2001
From: coomdev <coomer@coom.tech>
Date: Thu, 3 Aug 2023 11:27:44 +0200
Subject: [PATCH] accidentally left a debug thing

---
 src/main.js | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/main.js b/src/main.js
index b0b64c8..17d011f 100644
--- a/src/main.js
+++ b/src/main.js
@@ -145,7 +145,6 @@ function imageFromCanvas (img, bg, off) {
   const scale = th / h
 
   const canvas = document.createElement('canvas')
-  document.body.append(canvas)
   canvas.height = w * scale + pw * 2
   canvas.width = th
 
@@ -158,7 +157,7 @@ function imageFromCanvas (img, bg, off) {
   ctx.scale(-scale, scale)
   ctx.rotate((90 * Math.PI) / 180)
 
-  const adf = 0.3
+  const adf = 1 / 3
 
   const draw = function (off, adj) {
     if (adj) {
@@ -216,7 +215,6 @@ function imageFromCanvas (img, bg, off) {
         imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height)
         bestImagedata = imgdata
         bestOff = off
-        console.log(off)
       }
     }
 
@@ -260,8 +258,16 @@ async function predict (img, bg, off) {
     .fromPixels(image, 1)
     .mul(-1 / 238)
     .add(1)
-  const prediction = await model.predict(tensor.expandDims(0)).data()
 
+  // the image is rotated 90 degrees because it makes
+  // the pixels read by each invocation contiguous in memory, increasing performance
+  // model thus reads image left to right, and because of
+  // some conv layers, the total width ends up divided by 4
+  // for each line read, it emits 22 predictions, one for each captcha character,
+  // including an "empty" token if nothing probable was found
+  const prediction = await model.predict(tensor.expandDims(0)).data()
+  // since it's read from left to right, the results are also written from left to right
+  // the solution is the sequence of most probable non-empty character from left to right
   return createSequence(prediction)
 }
 
@@ -269,18 +275,20 @@ function createSequence (prediction) {
   const csl = charsetJSON.charset.length
   const sequence = []
 
+  // for each prediction
   for (let pos = 0; pos < prediction.length; pos += csl) {
+    // look at the probabilities for the 22 token characters
     const preds = prediction.slice(pos, pos + csl)
     const max = Math.max(...preds)
 
     const seqElem = {}
 
     for (let i = 0; i < csl; i++) {
-      const p = preds[i] / max
+      const p = preds[i] / max // normalize probability
       const c = charsetJSON.charset[i + 1]
 
-      if (p >= 0.05) {
-        seqElem[c || ''] = p
+      if (p >= 0.05) { // if it's probable enough
+        seqElem[c || ''] = p // save its probability, to give alternative solutions
       }
     }