use the sharty's slider algorithm

Use Yukariin's better model
truncate at 300 width
8 changed files with 1063 additions and 912 deletions
--- a/build-chrome.js
+++ b/build-chrome.js
@ -45,7 +45,7 @@ const manif3 = {
  const lmanif = manif3;

  res = await esbuild.build({
-    entryPoints: ["src/main.js"],
+    entryPoints: ["src/main.ts"],
    bundle: true,
    treeShaking: true,
    outdir: "./chrome/dist",
@ -59,7 +59,7 @@ const manif3 = {
    },
    // inject: ["./esbuild.inject.js"],
    plugins: [],
-    loader: { ".bin": "text", ".wasm": "binary" }, // Hacky solution for loading model.weights.bin but it works for now.
+    loader: { ".bin": "binary", ".wasm": "binary" }, // Hacky solution for loading model.weights.bin but it works for now.
    metafile: true,
  });

@ -97,7 +97,7 @@ const manif3 = {
  writeFileSync("./chrome/manifest.json", JSON.stringify(lmanif, null, 2));
  copyFileSync("./logo.png", "./chrome/1449696017588.png");

-  copyFileSync("./src/model.weights.bin", "./chrome/model.weights.bin");
+  copyFileSync("./src/group1-shard1of1.bin", "./chrome/group1-shard1of1.bin");
  copyFileSync(
    "./src/tfjs-backend-wasm-threaded-simd.wasm",
    "./chrome/tfjs-backend-wasm-threaded-simd.wasm"
--- a/build-ff.js
+++ b/build-ff.js
@ -50,7 +50,7 @@ const manif = {
  let res;

  res = await esbuild.build({
-    entryPoints: ["src/main.js"],
+    entryPoints: ["src/main.ts"],
    bundle: true,
    treeShaking: true,
    outdir: "./firefox/dist",
@ -64,7 +64,7 @@ const manif = {
    },
    // inject: ["./esbuild.inject.js"],
    plugins: [],
-    loader: { ".bin": "text", ".wasm": "binary" },
+    loader: { ".bin": "binary", ".wasm": "binary" },
    metafile: true,
  });

@ -102,7 +102,7 @@ const manif = {
  writeFileSync("./firefox/manifest.json", JSON.stringify(manif, null, 2));
  copyFileSync("./logo.png", "./firefox/1449696017588.png");

-  copyFileSync("./src/model.weights.bin", "./firefox/model.weights.bin");
+  copyFileSync("./src/group1-shard1of1.bin", "./firefox/group1-shard1of1.bin");
  copyFileSync(
    "./src/tfjs-backend-wasm-threaded-simd.wasm",
    "./firefox/tfjs-backend-wasm-threaded-simd.wasm"
--- a/model_gen/captcha_75_25.h5
+++ b/model_gen/captcha_75_25.h5
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38fe538eaa6f78c6d64cfa7af477d6f6680265c3faa97386f921131e436e215a
-size 47170376
+oid sha256:61e6c4d5689ecaf2056fa81f7e02163371dbdace2dba0527069fe21cb5bf98c5
+size 791405
--- a/model_gen/captcha_ocr.ipynb
+++ b/model_gen/captcha_ocr.ipynb
--- a/model_gen/penis.py
+++ b/model_gen/penis.py
@ -0,0 +1,47 @@
+import os
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+
+from pathlib import Path
+from collections import Counter
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+
+class CTCLayer(layers.Layer):
+    def __init__(self, name=None):
+        super().__init__(name=name)
+        self.loss_fn = keras.backend.ctc_batch_cost
+
+    def call(self, y_true, y_pred):
+        # Compute the training-time loss value and add it
+        # to the layer using `self.add_loss()`.
+        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
+        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
+        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
+
+        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
+
+        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
+        self.add_loss(loss)
+
+        # At test time, just return the computed predictions
+        return y_pred
+
+
+print("loading");
+model = tf.keras.saving.load_model("captcha_75_25.tf", custom_objects={'CTCLayer': CTCLayer});
+
+print("extracting");
+pmodel = keras.models.Model(
+    model.get_layer(name="image").input, model.get_layer(name="dense2").output
+)
+print("saving");
+
+tf.keras.saving.save_model(pmodel, "pcaptcha_75_25.keras")
+tf.keras.saving.save_model(pmodel, "pcaptcha_75_25.h5")
+
+print("done");
--- a/src/group1-shard1of1.bin
+++ b/src/group1-shard1of1.bin
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3d1959699ed9381dc680dcb6676e90670c6841c2b97fcf696ff6c2db261e1f5
-size 15704896
+oid sha256:f673767adc061648864cf142f2854df49c04ba0f717da152130cb007627e150d
+size 1863520
--- a/src/main.ts
+++ b/src/main.ts
@ -124,8 +124,143 @@ function calculateDisorder(imgdata: ImageData) {
  return res / (total === 0 ? 1 : total);
 }

+/*
+ * decide if a pixel is closer to black than to white.
+ * return 0 for white, 1 for black
+ */
+function pxlBlackOrWhite(r: number, g: number, b: number) {
+  return (r + g + b > 384) ? 0 : 1;
+}
+
+/*
+ * Get bordering pixels of transparent areas (the outline of the circles)
+ * and return their coordinates with the neighboring color.
+ */
+function getBoundries(imgdata: ImageData) {
+  const data = imgdata.data;
+  const width = imgdata.width;
+
+  let i = data.length - 1;
+  let cl = 0;
+  let cr = 0;
+  const chkArray = [];
+  let opq = true;
+  while (i > 0) {
+    // alpha channel above 128 is assumed opaque
+    const a = data[i] > 128;
+    if (a !== opq) {
+      if ((data[i - 4] > 128) === opq) {
+        // ignore just 1-width areas
+        i -= 4;
+        continue;
+      }
+      if (a) {
+        /* transparent pixel to its right */
+        /*
+                // set to color blue (for debugging)
+                data[i + 4] = 255;
+                data[i + 3] = 255;
+                data[i + 2] = 0;
+                data[i + 1] = 0;
+                */
+        const pos = (i + 1) / 4;
+        const x = pos % width;
+        const y = (pos - x) / width;
+        // 1: black, 0: white
+        const clr = pxlBlackOrWhite(data[i - 1], data[i - 2], data[i - 3]);
+        chkArray.push([x, y, clr]);
+        cr += 1;
+      } else {
+        /* opaque pixel to its right */
+        /*
+                // set to color red (for debugging)
+                data[i] = 255;
+                data[i - 1] = 0;
+                data[i - 2] = 0;
+                data[i - 3] = 255;
+                */
+        const pos = (i - 3) / 4;
+        const x = pos % width;
+        const y = (pos - x) / width;
+        // 1: black, 0: white
+        const clr = pxlBlackOrWhite(data[i + 1], data[i + 2], data[i + 3]);
+        chkArray.push([x, y, clr]);
+        cl += 1;
+      }
+      opq = a;
+    }
+    i -= 4;
+  }
+  return chkArray;
+}
+/*
+ * slide the background image and compare the colors of the border pixels in
+ * chkArray, the position with the most matches wins
+ * Return in slider-percentage.
+ */
+function getBestPos(bgdata: ImageData, chkArray: number[][], slideWidth: number) {
+  const data = bgdata.data;
+  const width = bgdata.width;
+  let bestSimilarity = 0;
+  let bestPos = 0;
+
+  for (let s = 0; s <= slideWidth; s += 1) {
+    let similarity = 0;
+    const amount = chkArray.length;
+    for (let p = 0; p < amount; p += 1) {
+      const chk = chkArray[p];
+      const x = chk[0] + s;
+      const y = chk[1];
+      const clr = chk[2];
+      const off = (y * width + x) * 4;
+      const bgclr = pxlBlackOrWhite(data[off], data[off + 1], data[off + 2]);
+      if (bgclr === clr) {
+        similarity += 1;
+      }
+    }
+    if (similarity > bestSimilarity) {
+      bestSimilarity = similarity;
+      bestPos = s;
+    }
+  }
+  return bestPos / slideWidth * 100;
+}
+
+async function getImageDataFromURI(uri: string) {
+  const image = await imageFromUri(uri);
+  if (!image)
+    throw new Error("No image");
+  const canvas = document.createElement('canvas');
+  canvas.width = image.width;
+  canvas.height = image.height;
+  const ctx = canvas.getContext('2d')!;
+  ctx.drawImage(image, 0, 0);
+  return ctx.getImageData(0, 0, canvas.width, canvas.height);
+}
+
+async function slideCaptcha(tfgElement: HTMLElement, tbgElement: HTMLElement, sliderElement: HTMLInputElement) {
+  // get data uris for captcha back- and foreground
+  const tbgUri = tbgElement.style.backgroundImage.slice(5, -2);
+  const tfgUri = tfgElement.style.backgroundImage.slice(5, -2);
+
+  // load foreground (image with holes)
+  const igd = await getImageDataFromURI(tfgUri);
+  // get array with pixels of foreground
+  // that we compare to background
+  const chkArray = getBoundries(igd);
+  // load background (image that gets slid)
+  const sigd = await getImageDataFromURI(tbgUri);
+  const slideWidth = sigd.width - igd.width;
+  // slide, compare and get best matching position
+  const sliderPos = getBestPos(sigd, chkArray, slideWidth);
+  // slide in the UI
+  sliderElement.value = '' + sliderPos;
+  (sliderElement as any).dispatchEvent(new Event('input'), { bubbles: true });
+  return 0 - (sliderPos / 2);
+}
+
 // returns ImageData from captcha's background image, foreground image, and offset (ranging from 0 to -50)
-function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: number) {
+async function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: number | null) {
  const h = img.height;
  const w = img.width;
  const th = 80;
@ -134,7 +269,8 @@ function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: numbe
  const scale = th / h;

  const canvas = document.createElement('canvas');
-  canvas.width = w * scale + pw * 2;
+  const cw = w * scale + pw * 2;
+  canvas.width = cw >= 300 ? 300 : cw;
  canvas.height = th;

  const ctx = canvas.getContext('2d', { willReadFrequently: true })!;
@ -162,44 +298,11 @@ function imageFromCanvas(img: HTMLImageElement, bg: HTMLImageElement, off: numbe
    ctx.drawImage(img, -w / 2, -h / 2, w, h);
  };

-  // if off is not specified and background image is present, try to figure out
-  // the best offset automatically; select the offset that has smallest value of
-  // calculateDisorder for the resulting image
  if (bg && off == null) {
-    let bestDisorder = 999;
-    let bestImagedata: ImageData | null = null;
-    let bestOff = -1;
-
-    for (let off = 0; off >= -50; off--) {
-      draw(off);
-
-      let imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height);
-      const disorder = calculateDisorder(imgdata);
-
-      if (disorder < bestDisorder) {
-        bestDisorder = disorder;
-        draw(off);
-        imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height);
-        bestImagedata = imgdata;
-        bestOff = off;
-      }
-    }
-
-    // not the best idea to do this here
-    setTimeout(function () {
-      const bg = document.getElementById('t-bg');
-      const slider = document.getElementById('t-slider') as HTMLInputElement;
-      if (!bg || !slider) return;
-
-      slider.value = '' + (-bestOff * 2);
-      bg.style.backgroundPositionX = bestOff + 'px';
-    }, 1);
-    draw(bestOff);
-    return bestImagedata;
-  } else {
-    draw(off);
-    return ctx.getImageData(0, 0, canvas.width, canvas.height);
+    off = await slideCaptcha(document.getElementById('t-fg')!, document.getElementById('t-bg')!, document.getElementById('t-slider') as HTMLInputElement);
  }
+  draw(off || 0);
+  return ctx.getImageData(0, 0, canvas.width, canvas.height);
 }

 // for debugging purposes
@ -305,7 +408,7 @@ async function predict(img: HTMLImageElement, bg: HTMLImageElement, off: number)
  if (!model) {
    model = await load();
  }
-  const image = imageFromCanvas(img, bg, off);
+  const image = await imageFromCanvas(img, bg, off);
  if (!image)
    throw new Error("Failed to gen image");
  const mono = toMonochrome(image.data);
--- a/src/model.json
+++ b/src/model.json
Author	SHA1	Message	Date
coomdev	fec1b4a896	use the sharty's slider algorithm	9 months ago
coomdev	35384d9fd8	Use Yukariin's better model	9 months ago
coomdev	8e4533ad16	truncate at 300 width	9 months ago
coomdev	1cc281e964	adjust webext build scripts	9 months ago