diff --git a/src/ccl.js b/src/ccl.js new file mode 100644 index 0000000..42a78a3 --- /dev/null +++ b/src/ccl.js @@ -0,0 +1,78 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.computeBounds = exports.connectedComponentLabeling = void 0; +function connectedComponentLabeling(binaryImage, width, height) { + var labels = Array(binaryImage.length).fill(0); + var linked = []; + var nextLabel = 1; + function getNeighbors(row, col) { + var neighbors = []; + if (row > 0 && labels[(row - 1) * width + col] > 0) { + neighbors.push(labels[(row - 1) * width + col]); + } + if (col > 0 && labels[row * width + col - 1] > 0) { + neighbors.push(labels[row * width + col - 1]); + } + return neighbors; + } + // First pass + for (var row = 0; row < height; row++) { + for (var col = 0; col < width; col++) { + var idx = row * width + col; + if (binaryImage[idx] !== 0) { + var neighbors = getNeighbors(row, col); + if (neighbors.length === 0) { + linked[nextLabel] = [nextLabel]; + labels[idx] = nextLabel; + nextLabel++; + } + else { + neighbors.sort(); + var smallestLabel = neighbors[0]; + labels[idx] = smallestLabel; + for (var i = 1; i < neighbors.length; i++) { + linked[neighbors[i]] = linked[neighbors[i]].concat(linked[smallestLabel]); + linked[smallestLabel] = linked[smallestLabel].concat(linked[neighbors[i]]); + linked[neighbors[i]] = Array.from(new Set(linked[neighbors[i]])); + linked[smallestLabel] = Array.from(new Set(linked[smallestLabel])); + } + } + } + } + } + // Second pass + for (var idx = 0; idx < binaryImage.length; idx++) { + if (binaryImage[idx] !== 0) { + labels[idx] = Math.min.apply(Math, linked[labels[idx]]); + } + } + return labels; +} +exports.connectedComponentLabeling = connectedComponentLabeling; +function computeBounds(labels, width, height) { + var bounds = {}; + for (var row = 0; row < height; row++) { + for (var col = 0; col < width; col++) { + var idx = row * width + col; + var label = labels[idx]; + if (label > 0) { + if (!bounds[label]) { + bounds[label] = { minRow: row, minCol: col, maxRow: row, maxCol: col, area: 1 }; + } + else { + if (row < bounds[label].minRow) + bounds[label].minRow = row; + if (col < bounds[label].minCol) + bounds[label].minCol = col; + if (row > bounds[label].maxRow) + bounds[label].maxRow = row; + if (col > bounds[label].maxCol) + bounds[label].maxCol = col; + ++bounds[label].area; + } + } + } + } + return bounds; +} +exports.computeBounds = computeBounds; diff --git a/src/charset.json b/src/charset.json index 38ad4b8..1797133 100644 --- a/src/charset.json +++ b/src/charset.json @@ -1,26 +1,3 @@ { - "charset": [ - "", - "0", - "2", - "4", - "8", - "A", - "D", - "G", - "H", - "J", - "K", - "M", - "N", - "P", - "Q", - "R", - "S", - "T", - "V", - "W", - "X", - "Y" - ] + } diff --git a/src/main.js b/src/main.js index 0f449f5..eda3087 100644 --- a/src/main.js +++ b/src/main.js @@ -1,8 +1,9 @@ import * as tf from '@tensorflow/tfjs' import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm' -import charsetJSON from './charset.json' import modelJSON from './model.json' +import ccl from './ccl' +const charset = [' ', '0', '2', '4', '5', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y'] let weightsData let model @@ -159,26 +160,7 @@ function imageFromCanvas (img, bg, off) { const adf = 1 / 3 - const draw = function (off, adj) { - if (adj) { - // stretching might cause interpolation that throws off the model, might need to clean up - if (bg) { - const border = 4 - ctx.drawImage( - bg, - /* sx */ -off + border, - /* sy */ 0, - /* sw */w - border * 2, - /* sh */h, - /* dx */-w / 2 + border, - /* dy */-h / 2 - (h * (adf * 0.5)), - /* dw */w - border * 2, - /* dh */h * (1 + adf) - ) - } - ctx.drawImage(img, -w / 2, -h / 2 - (h * (adf * 0.5)), w, h * (1 + adf)) - return - } + const draw = function (off) { if (bg) { const border = 4 ctx.drawImage( @@ -212,7 +194,7 @@ function imageFromCanvas (img, bg, off) { if (disorder < bestDisorder) { bestDisorder = disorder - draw(off, true) + draw(off) imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height) bestImagedata = imgdata bestOff = off @@ -228,7 +210,7 @@ function imageFromCanvas (img, bg, off) { slider.value = -bestOff * 2 bg.style.backgroundPositionX = bestOff + 'px' }, 1) - draw(bestOff, true) + draw(bestOff) return bestImagedata } else { draw(off) @@ -254,34 +236,36 @@ async function predict (img, bg, off) { model = await load() } const image = imageFromCanvas(img, bg, off) + const labels = ccl.connectedComponentLabeling(image.data.map(e => +(e > 128)), image.width, image.height) + const props = ccl.computeBounds(labels, image.width, image.height) - for (let i = 0; i < image.data.length; i += 4) { - if (image.data[i + 0] || - image.data[i + 1] || - image.data[i + 2]) { - image.data[i + 0] = image.data[i + 1] = image.data[i + 2] = 238 + const sortedByArea = Object.entries(props).sort((a, b) => a[1].area - b[1].area) + const eightBiggest = sortedByArea.slice(-8) + const filtered = new Float32Array(80 * 300) + + // TODO: maybe centering? + for (const [label, region] of eightBiggest) { + if ((region.maxRow - region.minRow) <= 20) { + continue + } + + for (let y = region.minRow; y < region.maxRow; ++y) { + for (let x = region.minCol; y < region.maxCol; ++x) { + if (labels[y * image.width + x] === label) { + filtered[y * 300 + x] = 1 + } + } } } - const tensor = tf.browser - .fromPixels(image, 1) - .mul(-1 / 238) - .add(1) - - // the image is rotated 90 degrees because it makes - // the pixels read by each invocation contiguous in memory, increasing performance - // model thus reads image left to right, and because of - // some conv layers, the total width ends up divided by 4 - // for each line read, it emits 22 predictions, one for each captcha character, - // including an "empty" token if nothing probable was found + const tensor = tf.tensor3d(filtered, [80, 300, 1], 'float32') const prediction = await model.predict(tensor.expandDims(0)).data() - // since it's read from left to right, the results are also written from left to right - // the solution is the sequence of most probable non-empty character from left to right + return createSequence(prediction) } function createSequence (prediction) { - const csl = charsetJSON.charset.length + const csl = charset.length const sequence = [] // for each prediction @@ -294,7 +278,7 @@ function createSequence (prediction) { for (let i = 0; i < csl; i++) { const p = preds[i] / max // normalize probability - const c = charsetJSON.charset[i + 1] + const c = charset[i + 1] if (p >= 0.05) { // if it's probable enough seqElem[c || ''] = p // save its probability, to give alternative solutions @@ -308,7 +292,7 @@ function createSequence (prediction) { } function postprocess (sequence, overrides) { - const csl = charsetJSON.charset.length + const csl = charset.length let possibilities = [{ sequence: [] }] sequence.forEach(function (e, i) {