Browse Source

pre-processing

master
coomdev 9 months ago
parent
commit
7aea2e8114
  1. 78
      src/ccl.js
  2. 25
      src/charset.json
  3. 72
      src/main.js

78
src/ccl.js

@ -0,0 +1,78 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.computeBounds = exports.connectedComponentLabeling = void 0;
function connectedComponentLabeling(binaryImage, width, height) {
var labels = Array(binaryImage.length).fill(0);
var linked = [];
var nextLabel = 1;
function getNeighbors(row, col) {
var neighbors = [];
if (row > 0 && labels[(row - 1) * width + col] > 0) {
neighbors.push(labels[(row - 1) * width + col]);
}
if (col > 0 && labels[row * width + col - 1] > 0) {
neighbors.push(labels[row * width + col - 1]);
}
return neighbors;
}
// First pass
for (var row = 0; row < height; row++) {
for (var col = 0; col < width; col++) {
var idx = row * width + col;
if (binaryImage[idx] !== 0) {
var neighbors = getNeighbors(row, col);
if (neighbors.length === 0) {
linked[nextLabel] = [nextLabel];
labels[idx] = nextLabel;
nextLabel++;
}
else {
neighbors.sort();
var smallestLabel = neighbors[0];
labels[idx] = smallestLabel;
for (var i = 1; i < neighbors.length; i++) {
linked[neighbors[i]] = linked[neighbors[i]].concat(linked[smallestLabel]);
linked[smallestLabel] = linked[smallestLabel].concat(linked[neighbors[i]]);
linked[neighbors[i]] = Array.from(new Set(linked[neighbors[i]]));
linked[smallestLabel] = Array.from(new Set(linked[smallestLabel]));
}
}
}
}
}
// Second pass
for (var idx = 0; idx < binaryImage.length; idx++) {
if (binaryImage[idx] !== 0) {
labels[idx] = Math.min.apply(Math, linked[labels[idx]]);
}
}
return labels;
}
exports.connectedComponentLabeling = connectedComponentLabeling;
function computeBounds(labels, width, height) {
var bounds = {};
for (var row = 0; row < height; row++) {
for (var col = 0; col < width; col++) {
var idx = row * width + col;
var label = labels[idx];
if (label > 0) {
if (!bounds[label]) {
bounds[label] = { minRow: row, minCol: col, maxRow: row, maxCol: col, area: 1 };
}
else {
if (row < bounds[label].minRow)
bounds[label].minRow = row;
if (col < bounds[label].minCol)
bounds[label].minCol = col;
if (row > bounds[label].maxRow)
bounds[label].maxRow = row;
if (col > bounds[label].maxCol)
bounds[label].maxCol = col;
++bounds[label].area;
}
}
}
}
return bounds;
}
exports.computeBounds = computeBounds;

25
src/charset.json

@ -1,26 +1,3 @@
{
"charset": [
"",
"0",
"2",
"4",
"8",
"A",
"D",
"G",
"H",
"J",
"K",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"X",
"Y"
]
}

72
src/main.js

@ -1,8 +1,9 @@
import * as tf from '@tensorflow/tfjs'
import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm'
import charsetJSON from './charset.json'
import modelJSON from './model.json'
import ccl from './ccl'
const charset = [' ', '0', '2', '4', '5', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y']
let weightsData
let model
@ -159,26 +160,7 @@ function imageFromCanvas (img, bg, off) {
const adf = 1 / 3
const draw = function (off, adj) {
if (adj) {
// stretching might cause interpolation that throws off the model, might need to clean up
if (bg) {
const border = 4
ctx.drawImage(
bg,
/* sx */ -off + border,
/* sy */ 0,
/* sw */w - border * 2,
/* sh */h,
/* dx */-w / 2 + border,
/* dy */-h / 2 - (h * (adf * 0.5)),
/* dw */w - border * 2,
/* dh */h * (1 + adf)
)
}
ctx.drawImage(img, -w / 2, -h / 2 - (h * (adf * 0.5)), w, h * (1 + adf))
return
}
const draw = function (off) {
if (bg) {
const border = 4
ctx.drawImage(
@ -212,7 +194,7 @@ function imageFromCanvas (img, bg, off) {
if (disorder < bestDisorder) {
bestDisorder = disorder
draw(off, true)
draw(off)
imgdata = ctx.getImageData(0, 0, canvas.width, canvas.height)
bestImagedata = imgdata
bestOff = off
@ -228,7 +210,7 @@ function imageFromCanvas (img, bg, off) {
slider.value = -bestOff * 2
bg.style.backgroundPositionX = bestOff + 'px'
}, 1)
draw(bestOff, true)
draw(bestOff)
return bestImagedata
} else {
draw(off)
@ -254,34 +236,36 @@ async function predict (img, bg, off) {
model = await load()
}
const image = imageFromCanvas(img, bg, off)
const labels = ccl.connectedComponentLabeling(image.data.map(e => +(e > 128)), image.width, image.height)
const props = ccl.computeBounds(labels, image.width, image.height)
for (let i = 0; i < image.data.length; i += 4) {
if (image.data[i + 0] ||
image.data[i + 1] ||
image.data[i + 2]) {
image.data[i + 0] = image.data[i + 1] = image.data[i + 2] = 238
const sortedByArea = Object.entries(props).sort((a, b) => a[1].area - b[1].area)
const eightBiggest = sortedByArea.slice(-8)
const filtered = new Float32Array(80 * 300)
// TODO: maybe centering?
for (const [label, region] of eightBiggest) {
if ((region.maxRow - region.minRow) <= 20) {
continue
}
for (let y = region.minRow; y < region.maxRow; ++y) {
for (let x = region.minCol; y < region.maxCol; ++x) {
if (labels[y * image.width + x] === label) {
filtered[y * 300 + x] = 1
}
}
}
}
const tensor = tf.browser
.fromPixels(image, 1)
.mul(-1 / 238)
.add(1)
// the image is rotated 90 degrees because it makes
// the pixels read by each invocation contiguous in memory, increasing performance
// model thus reads image left to right, and because of
// some conv layers, the total width ends up divided by 4
// for each line read, it emits 22 predictions, one for each captcha character,
// including an "empty" token if nothing probable was found
const tensor = tf.tensor3d(filtered, [80, 300, 1], 'float32')
const prediction = await model.predict(tensor.expandDims(0)).data()
// since it's read from left to right, the results are also written from left to right
// the solution is the sequence of most probable non-empty character from left to right
return createSequence(prediction)
}
function createSequence (prediction) {
const csl = charsetJSON.charset.length
const csl = charset.length
const sequence = []
// for each prediction
@ -294,7 +278,7 @@ function createSequence (prediction) {
for (let i = 0; i < csl; i++) {
const p = preds[i] / max // normalize probability
const c = charsetJSON.charset[i + 1]
const c = charset[i + 1]
if (p >= 0.05) { // if it's probable enough
seqElem[c || ''] = p // save its probability, to give alternative solutions
@ -308,7 +292,7 @@ function createSequence (prediction) {
}
function postprocess (sequence, overrides) {
const csl = charsetJSON.charset.length
const csl = charset.length
let possibilities = [{ sequence: [] }]
sequence.forEach(function (e, i) {

Loading…
Cancel
Save