You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
887 lines
259 KiB
887 lines
259 KiB
10 months ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "mjAScbd2vl9P"
|
||
|
},
|
||
|
"source": [
|
||
|
"# OCR model for reading Captchas\n",
|
||
|
"\n",
|
||
|
"**Author:** [A_K_Nain](https://twitter.com/A_K_Nain)<br>\n",
|
||
|
"**Date created:** 2020/06/14<br>\n",
|
||
|
"**Last modified:** 2020/06/26<br>\n",
|
||
|
"**Description:** How to implement an OCR model using CNNs, RNNs and CTC loss."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "wWvlZPBJvl9U"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Introduction\n",
|
||
|
"\n",
|
||
|
"This example demonstrates a simple OCR model built with the Functional API. Apart from\n",
|
||
|
"combining CNN and RNN, it also illustrates how you can instantiate a new layer\n",
|
||
|
"and use it as an \"Endpoint layer\" for implementing CTC loss. For a detailed\n",
|
||
|
"guide to layer subclassing, please check out\n",
|
||
|
"[this page](https://keras.io/guides/making_new_layers_and_models_via_subclassing/)\n",
|
||
|
"in the developer guides."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "Yq0Pe4Zuvl9U"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Setup"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {
|
||
|
"id": "5q-xCl8Qvl9V"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"2023-08-08 08:50:40.669196: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||
|
"To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import os\n",
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"import sys\n",
|
||
|
"\n",
|
||
|
"from pathlib import Path\n",
|
||
|
"from collections import Counter\n",
|
||
|
"\n",
|
||
|
"import tensorflow as tf\n",
|
||
|
"from tensorflow import keras\n",
|
||
|
"from tensorflow.keras import layers\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"True"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"tf.executing_eagerly()\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "sSm7N--8vl9W"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Load the data: [Captcha Images](https://www.kaggle.com/fournierp/captcha-version-2-images)\n",
|
||
|
"Let's download the data."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {
|
||
|
"id": "g3EVJfHBvl9X"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Number of images found: 10056\n",
|
||
|
"Number of labels found: 10056\n",
|
||
|
"Number of unique characters: 22\n",
|
||
|
"Characters present: [' ', '0', '2', '4', '5', '8', 'A', 'D', 'G', 'H', 'J', 'K', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'X', 'Y']\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"substitutions = {\n",
|
||
|
" 'B': '8',\n",
|
||
|
" 'F': 'P',\n",
|
||
|
" 'U': 'V',\n",
|
||
|
" '6': 'G',\n",
|
||
|
" 'Z': '2',\n",
|
||
|
" 'O': '0'\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"def apply_substitutions(input_string):\n",
|
||
|
" output_string = \"\"\n",
|
||
|
" for char in input_string:\n",
|
||
|
" if char in substitutions:\n",
|
||
|
" output_string += substitutions[char]\n",
|
||
|
" else:\n",
|
||
|
" output_string += char\n",
|
||
|
" \n",
|
||
|
" return output_string\n",
|
||
|
" \n",
|
||
|
"data_dir = Path(\"./captcha_images_v2/\")\n",
|
||
|
"vdata_dir = Path(\"./validation/\")\n",
|
||
|
"cdir = Path(\"./cache/\")\n",
|
||
|
"\n",
|
||
|
"# Get list of all the images\n",
|
||
|
"images = sorted(list(map(str, list(data_dir.glob(\"*.png\")))))\n",
|
||
|
"labels = [apply_substitutions(img.split(os.path.sep)[-1].split(\".png\")[0]) for img in images]\n",
|
||
|
"\n",
|
||
|
"images_v = sorted(list(map(str, list(vdata_dir.glob(\"*.png\")))))\n",
|
||
|
"labels_v = [img.split(os.path.sep)[-1].split(\".png\")[0] for img in images_v]\n",
|
||
|
"\n",
|
||
|
"# Maximum length of any captcha in the dataset\n",
|
||
|
"max_length = max([len(label) for label in labels])\n",
|
||
|
"labels = [x + ' ' * (max_length - len(x)) for x in labels]\n",
|
||
|
"labels_v = [x + ' ' * (max_length - len(x)) for x in labels_v]\n",
|
||
|
"\n",
|
||
|
"characters = set(char for label in labels for char in label)\n",
|
||
|
"characters = sorted(list(characters))\n",
|
||
|
"\n",
|
||
|
"print(\"Number of images found: \", len(images))\n",
|
||
|
"print(\"Number of labels found: \", len(labels))\n",
|
||
|
"print(\"Number of unique characters: \", len(characters))\n",
|
||
|
"print(\"Characters present: \", characters)\n",
|
||
|
"\n",
|
||
|
"# Batch size for training and validation\n",
|
||
|
"batch_size = 4\n",
|
||
|
"\n",
|
||
|
"# Desired image dimensions\n",
|
||
|
"img_width = 300\n",
|
||
|
"img_height = 80\n",
|
||
|
"\n",
|
||
|
"# Factor by which the image is going to be downsampled\n",
|
||
|
"# by the convolutional blocks. We will be using two\n",
|
||
|
"# convolution blocks and each block will have\n",
|
||
|
"# a pooling layer which downsample the features by a factor of 2.\n",
|
||
|
"# Hence total downsampling factor would be 4.\n",
|
||
|
"downsample_factor = 4\n",
|
||
|
"\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "gqn-NjRovl9Y"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Preprocessing"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {
|
||
|
"id": "MjQltH0Mvl9Y"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"2023-08-08 08:57:18.156048: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.442949: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.443125: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.446150: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.446487: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.446778: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.532929: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.533550: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.533758: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355\n",
|
||
|
"2023-08-08 08:57:18.533855: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2216 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5\n",
|
||
|
"2023-08-08 08:57:18.534141: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from skimage.morphology import opening, square, label\n",
|
||
|
"from skimage.measure import regionprops\n",
|
||
|
"from skimage.io import imread, imsave\n",
|
||
|
"from skimage import img_as_ubyte\n",
|
||
|
"\n",
|
||
|
"def crop_and_fill(image, crop_size, fill_value):\n",
|
||
|
" # Create a new image with the desired crop size and fill color\n",
|
||
|
" cropped_image = np.full(crop_size, fill_value, dtype=image.dtype) \n",
|
||
|
" original_size = image.shape\n",
|
||
|
" cropped_image[0:original_size[0], 0:original_size[1]] = image\n",
|
||
|
"\n",
|
||
|
" return cropped_image.reshape(cropped_image.shape[0], cropped_image.shape[1], 1)\n",
|
||
|
" \n",
|
||
|
"# Mapping characters to integers\n",
|
||
|
"char_to_num = layers.StringLookup(\n",
|
||
|
" vocabulary=list(characters), mask_token=None,\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"# Mapping integers back to original characters\n",
|
||
|
"num_to_char = layers.StringLookup(\n",
|
||
|
" vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"def filter_image(img, kernel_size=3, num_components=8, min_height_ratio=0.25, max_height_ratio=1):\n",
|
||
|
" # Binarize the image\n",
|
||
|
" binary_image = img < 0.5 # Pixels with a value less than 0.5 will be True (1)\n",
|
||
|
"\n",
|
||
|
" # Label connected components in the image\n",
|
||
|
" label_image = label(binary_image)\n",
|
||
|
"\n",
|
||
|
" # Get properties of the labeled regions\n",
|
||
|
" properties = regionprops(label_image)\n",
|
||
|
"\n",
|
||
|
" # Sort the regions by area (in descending order)\n",
|
||
|
" properties.sort(key=lambda x: x.area, reverse=True)\n",
|
||
|
"\n",
|
||
|
" # Create an empty image to store the result\n",
|
||
|
" filtered_image = np.zeros_like(label_image, dtype=bool)\n",
|
||
|
"\n",
|
||
|
" # Keep only the largest components that satisfy the height constraints\n",
|
||
|
" for prop in properties[:num_components]:\n",
|
||
|
" minr, minc, maxr, maxc = prop.bbox\n",
|
||
|
" height = maxr - minr\n",
|
||
|
" if height > max_height_ratio * img.shape[0] or height < min_height_ratio * img.shape[0]:\n",
|
||
|
" continue\n",
|
||
|
" filtered_image[label_image == prop.label] = 1\n",
|
||
|
"\n",
|
||
|
" return filtered_image == 0\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def read_and_process(imgpath, cdir):\n",
|
||
|
" img = imread(imgpath, as_gray=True);\n",
|
||
|
" img = np.hstack([img, np.ones((img_height, img_width - img.shape[1]))]).astype(\"float32\")\n",
|
||
|
" img = filter_image(img)\n",
|
||
|
" output_path = os.path.join(cdir, Path(imgpath).stem + \".png\")\n",
|
||
|
" imsave(output_path, np.clip(img_as_ubyte(img), 0, 238))\n",
|
||
|
" return tf.convert_to_tensor((1 - img).astype(\"float32\").reshape((80, 300, 1)));\n",
|
||
|
"\n",
|
||
|
"def load_data(images, labels, cache):\n",
|
||
|
" os.makedirs(cache, exist_ok=True)\n",
|
||
|
" # 1. Get the total size of the dataset\n",
|
||
|
" size = len(images)\n",
|
||
|
" # 2. Make an indices array and shuffle it, if required\n",
|
||
|
" indices = np.arange(size)\n",
|
||
|
" np.random.shuffle(indices)\n",
|
||
|
" # 3. Get the size of training samples\n",
|
||
|
" train_samples = int(size)\n",
|
||
|
" # 4. Split data into training and validation sets\n",
|
||
|
" x_train, y_train = images[indices], labels[indices]\n",
|
||
|
" x_train = [read_and_process(x, cache) for x in x_train]\n",
|
||
|
" return x_train, y_train\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# Splitting data into training and validation sets\n",
|
||
|
"rx_train, ry_train = load_data(np.array(images), np.array(labels), Path(\"sdir\"))\n",
|
||
|
"\n",
|
||
|
"def pad_image(image, target_height, target_width, pad_value):\n",
|
||
|
" # Compute the padding sizes\n",
|
||
|
" height, width = tf.shape(image)[0], tf.shape(image)[1]\n",
|
||
|
" pad_height = target_height - height\n",
|
||
|
" pad_width = target_width - width\n",
|
||
|
"\n",
|
||
|
" # Check if padding is needed\n",
|
||
|
" if pad_height < 0:\n",
|
||
|
" pad_height = 0;\n",
|
||
|
" \n",
|
||
|
" if pad_width < 0:\n",
|
||
|
" pad_width = 0;\n",
|
||
|
"\n",
|
||
|
" # Pad the image\n",
|
||
|
" padded_image = tf.pad(image, [[pad_height // 2, pad_height - pad_height // 2], \n",
|
||
|
" [pad_width // 2, pad_width - pad_width // 2], \n",
|
||
|
" [0, 0]], constant_values=pad_value)\n",
|
||
|
" return padded_image\n",
|
||
|
"\n",
|
||
|
"def encode_single_sample(img, label):\n",
|
||
|
" img = tf.image.convert_image_dtype(img, tf.float32)\n",
|
||
|
" # 4. Resize to the desired size\n",
|
||
|
" #img = tf.image.resize_with_pad(img, img_height, img_width)\n",
|
||
|
" # 5. Transpose the image because we want the time\n",
|
||
|
" # dimension to correspond to the width of the image.\n",
|
||
|
" img = tf.transpose(img, perm=[1, 0, 2])\n",
|
||
|
" # 6. Map the characters in label to numbers\n",
|
||
|
" label = char_to_num(tf.strings.unicode_split(label, input_encoding=\"UTF-8\"))\n",
|
||
|
" # 7. Return a dict as our model is expecting two inputs\n",
|
||
|
" return {\"image\": img, \"label\": label}\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "fnwhurZ-vl9Z"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Create `Dataset` objects"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"2514\n",
|
||
|
"7542\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"split_index = int(len(rx_train) * 0.75)\n",
|
||
|
"\n",
|
||
|
"# Move the first 75% of x_valid to x_train\n",
|
||
|
"x_train = rx_train[:split_index];\n",
|
||
|
"# Move the first 75% of y_valid to y_train\n",
|
||
|
"y_train = ry_train[:split_index];\n",
|
||
|
"\n",
|
||
|
"# Keep only the last 25% of x_valid\n",
|
||
|
"x_valid = rx_train[split_index:]\n",
|
||
|
"# Keep only the last 25% of y_valid\n",
|
||
|
"y_valid = ry_train[split_index:]\n",
|
||
|
"\n",
|
||
|
"print(len(x_valid))\n",
|
||
|
"print(len(x_train))"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"metadata": {
|
||
|
"id": "k2MZdcpXvl9Z"
|
||
|
},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))\n",
|
||
|
"train_dataset = (\n",
|
||
|
" train_dataset.map(\n",
|
||
|
" encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE\n",
|
||
|
" )\n",
|
||
|
" .batch(batch_size)\n",
|
||
|
" .prefetch(buffer_size=tf.data.AUTOTUNE)\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))\n",
|
||
|
"validation_dataset = (\n",
|
||
|
" validation_dataset.map(\n",
|
||
|
" encode_single_sample, num_parallel_calls=tf.data.AUTOTUNE\n",
|
||
|
" )\n",
|
||
|
" .batch(batch_size)\n",
|
||
|
" .prefetch(buffer_size=tf.data.AUTOTUNE)\n",
|
||
|
")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "NI0NRV5Ivl9Z"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Visualize the data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {
|
||
|
"id": "7GT5RSNgvl9Z"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0oAAAGtCAYAAAAoBIT+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAACHbUlEQVR4nO3deVwV9fc/8Ne9wAUUQUVFUASXhNRygRTc0DSXSs0003LNPqktbvUpSdPUUktzzSUT11JTcf1mJSWo5a5o5QKmomQggrKIAgLn94e/Ox/u3AuXy3rJ1/PxeD8eMPc9M+fOnXNnzszcGY2ICIiIiIiIiEihLe8AiIiIiIiIrA0LJSIiIiIiIhUWSkRERERERCoslIiIiIiIiFRYKBEREREREamwUCIiIiIiIlJhoURERERERKTCQomIiIiIiEiFhRIREREREZEKCyUrcezYMfTt2xf16tWDvb093NzcEBgYiHfffRe3bt2CTqfDwIED8x0/NTUVlSpVQu/evU2+vmrVKmg0Gjg5OSnDrl69iipVqqBfv34mx9m4cSM0Gg2++uorAMDw4cMNxgeATp06QaPRoEePHkbjx8TEQKPRYN68eWbfP5Ep27Ztg0ajwXfffWf0WvPmzaHRaPDTTz8ZvdawYUNotVrY2tri3XffNTntWbNmQaPR4Mcff0ROTg7mz5+PHj16oG7duqhUqRIef/xxTJo0CcnJyco4ixYtUsbJz9dffw2NRoPt27cD+F+O6JuDgwOaNGmCTz75BFlZWQbjMmfIEmvXroVGo8HJkycNhv/6668YNGiQsj2pXLkymjZtinfffRcXL15U+r399tvQaDSIj483GP/27dvQarWws7PD3bt3DV77+++/odFoMHHiRAD/W2c1Gg0+/vhjk3G+9tprSh8ARdqm5fdeExMT4e/vDycnJ4SFhRWwtOhRpV93NBoNIiIijF4XETRq1AgajQadOnXC1q1bodFosGTJEpPTe+ONN2Bvb4+zZ89Co9Fg0qRJ+c770qVL0Gg0GDt2rMHwF198ERqNBm+//bbJ8SIiIpSY165da7LP008/DY1GA29vb2VYVFQUKlWqhFdeecWo/507d1CnTh20adMGOTk5+cZMKkLl7v/+7/9Eq9XK008/LZs2bZKIiAjZtGmTvPvuu1KnTh0REenXr5/Y29vL7du3TU7jq6++EgCyc+dOo9f+/vtvcXFxEQ8PD6lcubLBaytXrhQA8u233xoMj4uLk+rVq0v37t2VYcOGDTMaPygoSAAIAPnll18MXrt69aoAkLlz5xZ+YRDlcevWLdFoNDJq1CiD4UlJSaLRaKRy5crywQcfGLwWGxsrAGTixIny4YcfilarlUOHDhn0+eOPP0Sn0ynTTUtLkypVqsgbb7whW7dulfDwcPniiy+kWrVq0qRJE7l3756IiCQmJoq9vb289NJL+cYcGBgoNWvWlKysLBF5mCMNGjSQI0eOyJEjR2T37t3Su3dvASD/+c9/DMZlzpAl1qxZIwDkxIkTyrDJkycLAAkMDJSvvvpK9u/fL/v27ZN58+bJE088IQAkOztbRES2bdsmAGTTpk0G092+fbvY2dmJnZ2d/PDDDwavrV+/XgDI7t27ReR/62yVKlXEy8tLcnJyDPqnpaWJk5OTODs7S95dDku3aabea2xsrPj6+kq1atXkyJEjli4+ekTo150qVarI4MGDjV4PDw9XXg8KChIRkVdeeUUqVaokly5dMuj7008/CQCZPXu2iIj4+fmJu7u7klNqwcHBAkDOnDmjDLt586bY2dkJAKlatarcv3+/wJjat29v9PqVK1dEo9GIs7OzeHl5Gby2YMECASDbtm0zGD5w4EBxcHCQCxcumIyVTGOhZAU6duwoDRs2lAcPHhi9pt/o7N27VwDIkiVLTE6jTZs24ubmZnIazz//vPTq1ctkoSMi0rNnT6levbr8888/yrDevXtLtWrV5O+//1aG5VcoNW7cWBo0aCB+fn6Sm5urvMadPioJTzzxhPj4+BgM0+/IjR07Vlq3bm3wmn5Hbs+ePZKZmSnNmzeXhg0bSnp6uoiIPHjwQPz8/KRBgwaSlpYmIiLZ2dmSmJhoNO+tW7cKANmwYYMybMCAAaLT6Uz2v3DhggCQd999VxkWFBQkTZs2Nej34MEDeeyxx0Sn0xlsJJkzZAl18bBx40YBIKNHjzb4LtbLzc2VL7/8UtmpS0xMNHkgYuzYsdK2bVsJDAyU999/3+C11157TbRarSQnJ4vI/9bZ119/XQDIvn37DPqvWrVKHB0dZfDgwQaFkqXbNPV7jY6Olnr16om7u7v8/vvvhV5m9OjRrzuvv/66ODo6SkpKisHrgwcPlsDAQGnatKlSKN2+fVs8PDykXbt2yn5YSkqKeHp6SmBgoJJDy5YtU7Y3atnZ2VKnTh3x8/MzGD537lwBIM8995zJA9Ui/yuU9HkVHR1t8PqUKVOkbt260rNnT6NCKTc3Vzp27Cg1a9aUmzdviojIli1bBIB88cUXhV9wJCIivPTOCiQlJaFGjRqwtbU1ek2rffgRde/eHXXr1sWaNWuM+ly4cAHHjh3D0KFDjabxzTff4MCBA1i2bFm+8w8JCQHw8HQyAGzYsAG7d+/Gl19+iTp16piN387ODp9++ilOnTpl8hIpouLo3LkzoqKiEBcXpwyLiIjAU089hWeffRanTp1CWlqawWs2Njbo0KEDdDod1q9fj9jYWHzwwQcAgNmzZyMyMhJr165VLiW1sbGBq6ur0bxbt24NAIiNjVWGjRw5EllZWdi4caNRf31+vvbaawW+J1tbW7Ro0QJZWVkGl/YRFccnn3yCGjVqYMGCBcplbnlpNBq89dZbsLGxAQC4urriiSeeMLocKSIiAp06dUJQUBDCw8ONXmvVqhVcXFwMhvv4+KBt27ZYvXq1wfDVq1fjxRdfNOpf1G0aAJw5cwbt27eHra0tfv31VzzxxBP5LxSi/2/QoEEAgE2bNinDUlJSEBoaavSdXa1aNYSEhOC3337DggULAAATJkxAUlIS1q1bp+TQK6+8AkdHR5Pr8b59+3Djxg2jaa9evRpubm5Yt24dHB0djXImr2eeeQaenp4GfXJzc7Fu3ToMGzZM2UfMS6PRYM2aNbh37x5Gjx6N+Ph4jBkzBh06dMD48ePNLCVSY6FkBQIDA3Hs2DGMHTsWx44dw4MHD4z6aLVaDB8+HKdPn8bZs2cNXstv5ywhIQHjx4/HnDlzULdu3Xzn7+7ujqVLl+L//u//MHv2bIwbNw79+vUzeY1rfl5++WX4+flhypQpJuMnKqrOnTsDgMHOXHh4OIKCgtCuXTtoNBocOnTI4LW8O3JPPvkkpk+fjqVLl2LRokWYOXMmJk6ciA4dOpid9/79+wEATZs2VYZ17doVXl5eRhu3nJwcbNiwAQEBAWjSpInZaV+9ehVVq1ZFzZo1zfYlMueff/7B+fPn8cwzz8DBwaHQ46kPRCQlJeGPP/5AUFAQgoKCcPr0aaSmpgJ4eMDgypUrSk6qjRw5Ejt37sSdO3cAPPy9xOHDhzFy5EijvkXZpgEPf3/VqVMn1KpVC7/++isaNGhQ6PdKjzZnZ2f079/f4Lt706ZN0Gq1ePnll4369+jRA6NGjcKUKVOwYMECrF69Gp9//jkee+wxpY+Liwv69euHPXv24NatWwbjr1mzBg4ODgb7UocPH8aFCxcwdOhQuLq6ol+/fti/fz+uXr1qMmZ9nqxfv175XdG+ffvw999/Y8SIEfm+1wYNGmDu3LnYsWMH2rdvj4yMDKxZs8ZkYUUF4xKzAnPmzEH79u2xZMkSBAQEoHLlymjXrh3mzJlj8ENa/Q9i8yZ5dnY2NmzYgHbt2sHX19dgum+++SZ8fHwwZswYszEMHDgQAwYMwIcffgidTocVK1ZY9B40Gg0+++wzXL58Wbn5A1FJCAoKglarVQqlpKQk/PnnnwgKCoKTkxNatWqlHPWOjY3F1atXjXbk/vvf/yIgIADjx49H48aN8cknn5id740bNzBp0iT4+/vj+eefV4brN1xnzpxBZGSkMvyHH35
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x500 with 16 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"_, ax = plt.subplots(4, 4, figsize=(10, 5))\n",
|
||
|
"for batch in train_dataset.take(1):\n",
|
||
|
" images2 = batch[\"image\"]\n",
|
||
|
" labels2 = batch[\"label\"]\n",
|
||
|
" for i in range(batch_size):\n",
|
||
|
" img = (images2[i] * 255).numpy().astype(\"uint8\")\n",
|
||
|
" label = tf.strings.reduce_join(num_to_char(labels2[i])).numpy().decode(\"utf-8\")\n",
|
||
|
" ax[i // 4, i % 4].imshow(img[:, :, 0].T, cmap=\"gray\", vmin=0, vmax=255)\n",
|
||
|
" ax[i // 4, i % 4].set_title(label)\n",
|
||
|
" ax[i // 4, i % 4].axis(\"off\")\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "5pgP4jIIvl9a"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Model"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {
|
||
|
"id": "ddaZyWUFvl9a",
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Model: \"ocr_model_v1\"\n",
|
||
|
"__________________________________________________________________________________________________\n",
|
||
|
" Layer (type) Output Shape Param # Connected to \n",
|
||
|
"==================================================================================================\n",
|
||
|
" image (InputLayer) [(None, 300, 80, 1)] 0 [] \n",
|
||
|
" \n",
|
||
|
" conv_2d1 (Conv2D) (None, 300, 80, 60) 600 ['image[0][0]'] \n",
|
||
|
" \n",
|
||
|
" pool5 (MaxPooling2D) (None, 150, 40, 60) 0 ['conv_2d1[0][0]'] \n",
|
||
|
" \n",
|
||
|
" reshape (Reshape) (None, 75, 4800) 0 ['pool5[0][0]'] \n",
|
||
|
" \n",
|
||
|
" bidirectional (Bidirection (None, 75, 200) 3920800 ['reshape[0][0]'] \n",
|
||
|
" al) \n",
|
||
|
" \n",
|
||
|
" label (InputLayer) [(None, None)] 0 [] \n",
|
||
|
" \n",
|
||
|
" dense2 (Dense) (None, 75, 24) 4824 ['bidirectional[0][0]'] \n",
|
||
|
" \n",
|
||
|
" ctc_loss (CTCLayer) (None, 75, 24) 0 ['label[0][0]', \n",
|
||
|
" 'dense2[0][0]'] \n",
|
||
|
" \n",
|
||
|
"==================================================================================================\n",
|
||
|
"Total params: 3926224 (14.98 MB)\n",
|
||
|
"Trainable params: 3926224 (14.98 MB)\n",
|
||
|
"Non-trainable params: 0 (0.00 Byte)\n",
|
||
|
"__________________________________________________________________________________________________\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"class CTCLayer(layers.Layer):\n",
|
||
|
" def __init__(self, name=None):\n",
|
||
|
" super().__init__(name=name)\n",
|
||
|
" self.loss_fn = keras.backend.ctc_batch_cost\n",
|
||
|
"\n",
|
||
|
" def call(self, y_true, y_pred):\n",
|
||
|
" # Compute the training-time loss value and add it\n",
|
||
|
" # to the layer using `self.add_loss()`.\n",
|
||
|
" batch_len = tf.cast(tf.shape(y_true)[0], dtype=\"int64\")\n",
|
||
|
" input_length = tf.cast(tf.shape(y_pred)[1], dtype=\"int64\")\n",
|
||
|
" label_length = tf.cast(tf.shape(y_true)[1], dtype=\"int64\")\n",
|
||
|
"\n",
|
||
|
" input_length = input_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
|
||
|
" label_length = label_length * tf.ones(shape=(batch_len, 1), dtype=\"int64\")\n",
|
||
|
"\n",
|
||
|
" loss = self.loss_fn(y_true, y_pred, input_length, label_length)\n",
|
||
|
" self.add_loss(loss)\n",
|
||
|
"\n",
|
||
|
" # At test time, just return the computed predictions\n",
|
||
|
" return y_pred\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"def build_model():\n",
|
||
|
" # Inputs to the model\n",
|
||
|
" input_img = layers.Input(\n",
|
||
|
" shape=(img_width, img_height, 1), name=\"image\", dtype=\"float32\"\n",
|
||
|
" )\n",
|
||
|
" labels = layers.Input(name=\"label\", shape=(None,), dtype=\"float32\")\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" # First conv block\n",
|
||
|
" x = layers.Conv2D(40, (3, 3), activation=\"relu\", kernel_initializer=\"GlorotUniform\", padding=\"same\",use_bias=True, name=\"conv_2d\")(input_img) \n",
|
||
|
" x = layers.MaxPooling2D((2, 2), name=\"pool3\")(x)\n",
|
||
|
" x = layers.Conv2D(60, (3, 3), activation=\"relu\", kernel_initializer=\"GlorotUniform\", padding=\"same\",use_bias=True, name=\"conv_2d1\")(input_img) \n",
|
||
|
" x = layers.MaxPooling2D((2, 2), name=\"pool5\")(x)\n",
|
||
|
"\n",
|
||
|
" # Reshape layer to prepare for the RNN layers\n",
|
||
|
" new_shape = ((img_width // 4), (img_height // 4) * 60 * 4)\n",
|
||
|
" #new_shape = (36, 256)\n",
|
||
|
" x = layers.Reshape(target_shape=new_shape, name=\"reshape\")(x)\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" # RNN layers\n",
|
||
|
" x = layers.Bidirectional(layers.LSTM(100, recurrent_activation=\"sigmoid\", activation=\"tanh\",kernel_initializer=\"GlorotUniform\", recurrent_initializer=\"Orthogonal\", return_sequences=True, dropout=0.3), merge_mode=\"concat\")(x)\n",
|
||
|
" \n",
|
||
|
" # Output layer\n",
|
||
|
" x = layers.Dense(len(char_to_num.get_vocabulary()) + 1, activation=\"softmax\", name=\"dense2\")(x)\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
" # Add CTC layer for calculating CTC loss at each step\n",
|
||
|
" output = CTCLayer(name=\"ctc_loss\")(labels, x)\n",
|
||
|
"\n",
|
||
|
" # Define the model\n",
|
||
|
" model = keras.models.Model(\n",
|
||
|
" inputs=[input_img, labels], outputs=output, name=\"ocr_model_v1\"\n",
|
||
|
" )\n",
|
||
|
" # Optimizer\n",
|
||
|
" opt = keras.optimizers.Adam(learning_rate=0.0075)\n",
|
||
|
" # Compile the model and return\n",
|
||
|
" model.compile(optimizer=opt)\n",
|
||
|
" return model\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# Get the model\n",
|
||
|
"model = build_model()\n",
|
||
|
"model.summary()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "PCAmf-fzvl9a"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Training"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 14,
|
||
|
"metadata": {
|
||
|
"id": "M-R6QGjuvl9a"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 1/100\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"2023-08-08 09:00:00.538999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902\n",
|
||
|
"2023-08-08 09:00:01.417354: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f9eb707a6f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
||
|
"2023-08-08 09:00:01.417398: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5\n",
|
||
|
"2023-08-08 09:00:01.480666: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
|
||
|
"2023-08-08 09:00:01.881648: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"1886/1886 [==============================] - 66s 32ms/step - loss: 10.4818 - val_loss: 3.5385\n",
|
||
|
"Epoch 2/100\n",
|
||
|
"1886/1886 [==============================] - 59s 31ms/step - loss: 2.9341 - val_loss: 2.3414\n",
|
||
|
"Epoch 3/100\n",
|
||
|
"1886/1886 [==============================] - 57s 30ms/step - loss: 2.1140 - val_loss: 1.9919\n",
|
||
|
"Epoch 4/100\n",
|
||
|
"1886/1886 [==============================] - 58s 31ms/step - loss: 1.7638 - val_loss: 1.7001\n",
|
||
|
"Epoch 5/100\n",
|
||
|
"1886/1886 [==============================] - 68s 36ms/step - loss: 1.5672 - val_loss: 1.7731\n",
|
||
|
"Epoch 6/100\n",
|
||
|
"1886/1886 [==============================] - 69s 37ms/step - loss: 1.5323 - val_loss: 1.9233\n",
|
||
|
"Epoch 7/100\n",
|
||
|
"1886/1886 [==============================] - 67s 36ms/step - loss: 1.4012 - val_loss: 1.6993\n",
|
||
|
"Epoch 8/100\n",
|
||
|
"1886/1886 [==============================] - 72s 38ms/step - loss: 1.4151 - val_loss: 2.1529\n",
|
||
|
"Epoch 9/100\n",
|
||
|
"1886/1886 [==============================] - 67s 35ms/step - loss: 1.3961 - val_loss: 1.6122\n",
|
||
|
"Epoch 10/100\n",
|
||
|
"1886/1886 [==============================] - 63s 34ms/step - loss: 1.3398 - val_loss: 1.7357\n",
|
||
|
"Epoch 11/100\n",
|
||
|
"1886/1886 [==============================] - 66s 35ms/step - loss: 1.2908 - val_loss: 1.6795\n",
|
||
|
"Epoch 12/100\n",
|
||
|
"1886/1886 [==============================] - 65s 34ms/step - loss: 1.2644 - val_loss: 1.5135\n",
|
||
|
"Epoch 13/100\n",
|
||
|
"1886/1886 [==============================] - 67s 36ms/step - loss: 1.2279 - val_loss: 1.6908\n",
|
||
|
"Epoch 14/100\n",
|
||
|
"1886/1886 [==============================] - 68s 36ms/step - loss: 1.2833 - val_loss: 1.6330\n",
|
||
|
"Epoch 15/100\n",
|
||
|
"1886/1886 [==============================] - 64s 34ms/step - loss: 1.3665 - val_loss: 1.7188\n",
|
||
|
"Epoch 16/100\n",
|
||
|
"1886/1886 [==============================] - 59s 31ms/step - loss: 1.2802 - val_loss: 1.4348\n",
|
||
|
"Epoch 17/100\n",
|
||
|
"1886/1886 [==============================] - 66s 35ms/step - loss: 1.2824 - val_loss: 1.5345\n",
|
||
|
"Epoch 18/100\n",
|
||
|
"1886/1886 [==============================] - 57s 30ms/step - loss: 1.2963 - val_loss: 1.4366\n",
|
||
|
"Epoch 19/100\n",
|
||
|
"1886/1886 [==============================] - 61s 32ms/step - loss: 1.2542 - val_loss: 1.4457\n",
|
||
|
"Epoch 20/100\n",
|
||
|
"1886/1886 [==============================] - 61s 33ms/step - loss: 1.2377 - val_loss: 1.4475\n",
|
||
|
"Epoch 21/100\n",
|
||
|
"1886/1886 [==============================] - 58s 31ms/step - loss: 1.2494 - val_loss: 1.5095\n",
|
||
|
"Epoch 22/100\n",
|
||
|
"1886/1886 [==============================] - 58s 31ms/step - loss: 1.2469 - val_loss: 1.5823\n",
|
||
|
"Epoch 23/100\n",
|
||
|
"1886/1886 [==============================] - 63s 33ms/step - loss: 1.2601 - val_loss: 1.5059\n",
|
||
|
"Epoch 24/100\n",
|
||
|
"1886/1886 [==============================] - 68s 36ms/step - loss: 1.3095 - val_loss: 1.5356\n",
|
||
|
"Epoch 25/100\n",
|
||
|
"1886/1886 [==============================] - 67s 35ms/step - loss: 1.6499 - val_loss: 1.7413\n",
|
||
|
"Epoch 26/100\n",
|
||
|
"1886/1886 [==============================] - 68s 36ms/step - loss: 1.4921 - val_loss: 1.7528\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"epochs = 100\n",
|
||
|
"early_stopping_patience = 10\n",
|
||
|
"# Add early stopping\n",
|
||
|
"early_stopping = keras.callbacks.EarlyStopping(\n",
|
||
|
" monitor=\"val_loss\", patience=early_stopping_patience, restore_best_weights=True\n",
|
||
|
")\n",
|
||
|
"\n",
|
||
|
"# Train the model\n",
|
||
|
"history = model.fit(\n",
|
||
|
" train_dataset,\n",
|
||
|
" validation_data=validation_dataset,\n",
|
||
|
" epochs=epochs,\n",
|
||
|
" callbacks=[early_stopping],\n",
|
||
|
")\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {
|
||
|
"id": "1EpbnVEEvl9a"
|
||
|
},
|
||
|
"source": [
|
||
|
"## Inference\n",
|
||
|
"\n",
|
||
|
"You can use the trained model hosted on [Hugging Face Hub](https://huggingface.co/keras-io/ocr-for-captcha)\n",
|
||
|
"and try the demo on [Hugging Face Spaces](https://huggingface.co/spaces/keras-io/ocr-for-captcha)."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {
|
||
|
"id": "8_xv3ktTvl9b"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Model: \"model\"\n",
|
||
|
"_________________________________________________________________\n",
|
||
|
" Layer (type) Output Shape Param # \n",
|
||
|
"=================================================================\n",
|
||
|
" image (InputLayer) [(None, 300, 80, 1)] 0 \n",
|
||
|
" \n",
|
||
|
" conv_2d1 (Conv2D) (None, 300, 80, 60) 600 \n",
|
||
|
" \n",
|
||
|
" pool5 (MaxPooling2D) (None, 150, 40, 60) 0 \n",
|
||
|
" \n",
|
||
|
" reshape (Reshape) (None, 75, 4800) 0 \n",
|
||
|
" \n",
|
||
|
" bidirectional (Bidirection (None, 75, 200) 3920800 \n",
|
||
|
" al) \n",
|
||
|
" \n",
|
||
|
" dense2 (Dense) (None, 75, 24) 4824 \n",
|
||
|
" \n",
|
||
|
"=================================================================\n",
|
||
|
"Total params: 3926224 (14.98 MB)\n",
|
||
|
"Trainable params: 3926224 (14.98 MB)\n",
|
||
|
"Non-trainable params: 0 (0.00 Byte)\n",
|
||
|
"_________________________________________________________________\n",
|
||
|
"1/1 [==============================] - 1s 696ms/step\n",
|
||
|
"HSXVW \n",
|
||
|
"28P0VK\n",
|
||
|
"VMTPM \n",
|
||
|
"HKYJA2\n",
|
||
|
"['HSXVW ', 'HSXVW ']\n",
|
||
|
"['28P0VK', '28P0VK']\n",
|
||
|
"['VMTPM ', 'VMTPM ']\n",
|
||
|
"['HKYJA2', 'HKYJA2']\n",
|
||
|
"1/1 [==============================] - 0s 32ms/step\n",
|
||
|
"YPDVAK\n",
|
||
|
"Y0S4RR\n",
|
||
|
"H4DRJ \n",
|
||
|
"NJTNKP\n",
|
||
|
"['YPDVAK', 'YPDVAK']\n",
|
||
|
"['0S4RR ', 'Y0S4RR']\n",
|
||
|
"['H4DR ', 'H4DRJ ']\n",
|
||
|
"['NJTNKP', 'NJTNKP']\n",
|
||
|
"1/1 [==============================] - 0s 53ms/step\n",
|
||
|
"088S4 \n",
|
||
|
"PVSSSY\n",
|
||
|
"SVK4WA\n",
|
||
|
"ASJKKX\n",
|
||
|
"['088S4 ', '088S4 ']\n",
|
||
|
"['PSSY ', 'PVSSSY']\n",
|
||
|
"['SVK4WA', 'SVK4WA']\n",
|
||
|
"['ASJKKX', 'ASJKKX']\n",
|
||
|
"1/1 [==============================] - 0s 47ms/step\n",
|
||
|
"0RW8HW\n",
|
||
|
"0ANDKK\n",
|
||
|
"R204S0\n",
|
||
|
"4VNHNX\n",
|
||
|
"['0RW8HW', '0RW8HW']\n",
|
||
|
"['0ANDKK', '0ANDKK']\n",
|
||
|
"['R204S0', 'R204S0']\n",
|
||
|
"['VNHNX ', '4VNHNX']\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ4AAAGZCAYAAADILvv1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd1hUx9fHv7v0KkVFioINUQkiYIsNflgwloC9C2KNPWqiMVGxxNh7jQho7CL2iooae0ElKqgoSkBUqohLWTjvH/vuDcsWdmmLOJ/nOc8Dc+fOnDv33rMz586c4RERgcFgMBgMBoPBYDAYDAaDwShj+OpWgMFgMBgMBoPBYDAYDAaDUTVhjicGg8FgMBgMBoPBYDAYDEa5wBxPDAaDwWAwGAwGg8FgMBiMcoE5nhgMBoPBYDAYDAaDwWAwGOUCczwxGAwGg8FgMBgMBoPBYDDKBeZ4YjAYDAaDwWAwGAwGg8FglAvM8cRgMBgMBoPBYDAYDAaDwSgXmOOJwWAwGAwGg8FgMBgMBoNRLjDHE4PBYDAYDAaDwWAwGAwGo1xQu+Mp+EEweAE8TjQXaMJmlQ38jvoh4WNCicqMiIsAL4CHQ08OyTw+8dRE8AJ4Eml5+XnYencrWvzZAmZLzaC/WB+2a2zx/b7vEfY0jMu38+FO8AJ42HZvm1S51+OvQ2OBBmacm4EPWR+gvVAbAw8NlKvnx5yP0F+sj157e+HQk0PgBfCw/5/9UvmabWkGXgAPZ1+clTpWf119uGx1kVuHLAq3tyKJiItQqcyJpybKPCa+tqLlnX1xFl12dYHVSivoLNKB1UoruAe744+//+DyxCTHQH+xPgaHDpYqN02QButV1mi1vRXyC/LRfGtzWK+yRn5Bvlw92+5oi+rLquPdp3fgB/Ax/sR4qTxTTk8BL4CH2eGzpY75H/WHxgINpAnS5NahiKuvr0JnkQ5ep7+WSL//9j467ewEw98NYfKHCXrv742XaS8l8jxLeQbthdq4//Z+iepmlC/lYcvuJd7DhJMT8M3mb2C0xAgWKyzQaWcnXHx1UWb+0CehaLujLcyWmsHkDxO0/LMldj3cJZWv6Lte7Y9qcA92x8lnJ6Xyhr8MR5vANtBfrI/qy6rD94gv3me9545POzMNvAAeopOj5V7HnAtzwAvgcc+u3Ro79NjTQyrf9vvbobFAA7329kK2MFvqeNE2lid2a+zk6lKUi68uYuTRkXDY4ACD3w1gvcoa3+/7HvcS70nlJSL8ee9PuG5zhfESY5gvM0fH4I5S7RaXHiehDz+AD/Nl5vhu93e4EX9Dqtx9/+yD8xZn6C7ShdVKK0w9MxWfcj9xx332+0BvsR7Ss9PlXseQw0OgtVAL7z69AyDfJs+7NA+8AB7GnxiPAipQ2Dbp2emovqw69v2zTyL9fdZ7+B7xRfVl1aG/WB9tAtvgwssLEnny8vNQf119rLm5RmEdDPVS1narpPaAF8CDe7C7zPzifpe4H1H0/VIkcelxXJ9QLBoLNGCxwgL9DvbD0w9PuXoKlzs/Yr5MXUYeHcnlURXxdRYnwQ+Ciy3rYdJD8AJ4mBU+S26e5ynPwQvgYfLpyQCA+RHzOXtUtH8BAFm5WTBeYgxeAA++R3wBAO7B7krpLG6votdo+LshWm1vhZ0Pd0rUJS633tp6ICIpXa68vqJSe8iD2bCvj8oyrnQPdofjJkepvKeen+Keubpr68JujR0yczKl8r1IfQGD3w0wKHSQxHXdTbwrUweXrS7gBfCw4voKmcdV6esURlV7qwziMuXpuuL6CqnyfI/4wvB3Q6m8dxLuoPqy6rBfbw+PEA+Y/GGC+Ix4qXypglRYrrRE2x1tUUAFxd7T3vt7Kxzbqto/l8eCywvQZGMTqf5Ycf1CAAi8HwjrVdbIys1SqU51onbHk5ig74Nww/8Gzg87j9Euo7E3ai/aB7WvsMYcFjYMk05PgoedB/7q/ReODzqOX9v/Ck2+Js7G/ufwGd5sOL5v9D2mn5su8UJk5WZhxJERsDe3x6L/LUINgxro1agXjkQfkeuk2PfPPgiEAvg394e7nTt44OFS3CWJPKmCVES9i4KBloHUsX8//ouXaS/hYeeh0rXe8L8hId81/A56mnpS6S6Wqjm0VGHL3S3w2u0FYx1jbPhuA84OPYulnZaicfXGEkagUfVG+N3zd+z9Zy9Cn4RKlPHDqR+QKkhFiHcINPga8G/uj8TMRIn7VZhnKc9wPf46hjkNg4WhBZrWbCrVpgAQ8TpCZnuLjznXcoapnqnK10xEmHp2Kka7jIatiS2XHp0cDfdgd+Tm5+JAvwPY8f0OPEt5hvZB7fEh6wOXz97cHkOchmDa2Wkq182oOMrSlu39Zy9uJ97GSOeRODrwKLb33A4dTR147vSU6sjviNyBvgf7wtLQErt778a+vvtQ36w+hh8ZjtU3VkuV3bdJX9zwv4FrI69h43cbkfQpCT339pRwolyOu4xuu7vBwsACRwcexVqvtQh/GQ7PnZ7IEeYAAPxd/Ln6ZVFABdj5aCecazkrtCnLry3H6OOjMeSbITg84DB0NXWl8nRv2F3KThW+FrGEDQiTOlcem+9uRlx6HKa0moJTg09hrddavM96j9aBraU6EPMi5mHMiTFoadUSof1DEfx9MHQ0dNBjbw8cfnpYquxJLSfhhv8NXPW7iiWeS/Dw3UN4hHgg8m0kl2f3o90YFDoILaxa4PSQ05jXcR6CHwSj9/7eXB7/5v7IFmZjT9QemdeQkZ2BsKdh6GHfAxaGFjLzEBEmn56MBVcWYFbbWdjcYzP4PMVdgICIAFgZWWFA0wFcWo4wB547PXHh1QWs9VqLowOPwsLAAl67vXA57jKXT0tDC3M7zMWCywuQ8jlFYT0M9VNWdquk9sBI2whXXl9BbGqs1Dk7InfAWMeY+9/S0FLKDjSv1Rz1TOtJpVsaWnLn/f6/33HD/wYujbiEn9v+jPOx59F2R1upwaiRthGCHwRLDQQ+5X7CwScHJXRRhbABYRK6+TcXtdWZIWck0rs37F5sWc1qNYOrpSt2Ptwp92Nb0IMgAODqEWOobYigyCCp/AefHEReQR60+Fpc2qbumyR0+7X9r6Ky//95Ecsol1HcOW1rt+XSg72DwQMPI46MwOY7myXqM9I2wqv0VzIHakXveUlhNuzrRd3jSlnsjdoL733eaFunLcKHheP4oONI+pSE6eemS+QroAL4HfVDNZ1q2PjdxmLLfZD0AJFJon5FYGSgzDyq9HUKU1J7WxFcenUJnjs9Ubtabfw98m/s7bMXmnxNjDo+SirvxFMTkZmTiRDvkGL7Pu+z3uPEsxMAgN1Ru2V+CFWlfy6PxMxELLu2DAs8FkjopEy/EABGOI+AgZYBll1bplR9lQJSM0GRQYT5oDsJdyTSf7v4G2E+6K+Hf6lc5qVXlwjzQQcfH5R5fMLJCYT5/136y9SXhPmguRfnysyfX5Av8X9SZhKZLzUn92B3KigoICKi8SfGk0aABt369xaX79SzU4T5oPW31ssst9WfrchiuQXl5ecREdE3m76hRusbSeQ5/OQwaS3QosmnJlPLP1tKHNv5YCdhPuh4zHGZ5SvLiLARZLDYoFRlYD5owskJMo8dfHyQMB906dUlLq3O6jrUIaiDzPxF27ugoIA6BHWgGstq0LtP74iI6MA/BwjzQSuvr+TypX5OJd1FutRnfx+Z5f58/mfCfNCjpEdERDTp1CTCfNDbzLdcnpTPKcSbz6MZZ2eQ5gJN+pj9kTsWnxFPmA+afna6gpaQj/h5iP4QLZHe70A/qr6sOmVkZ3BpcWlxpLVAi34695NE3rsJdwnzQdfeXCuRDozyozxsmfh5L4wwX0hOm52o/tr6EultA9uS7WpbifenoKCAHDY4kNNmJ4m8st7XFykvCPNBnXZ24tJabGtBTTY24WwUEdG1N9cI80Gbbm/i0lr+2ZJqraglkU/M6eenpeyg7Wpb6r67O/f/7PDZhPmgSacmcTZVWRTZHmWQ1caZOZlksdyCPEM8JdKtV1pTux3tJNIEeQKqtqQa9drbi0t7lfaKMB+
|
||
|
"text/plain": [
|
||
|
"<Figure size 1500x500 with 16 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from sklearn.metrics import pairwise_distances\n",
|
||
|
"\n",
|
||
|
"# Get the prediction model by extracting layers till the output layer\n",
|
||
|
"prediction_model = keras.models.Model(\n",
|
||
|
" model.get_layer(name=\"image\").input, model.get_layer(name=\"dense2\").output\n",
|
||
|
")\n",
|
||
|
"prediction_model.summary()\n",
|
||
|
"\n",
|
||
|
"# A utility function to decode the output of the network\n",
|
||
|
"def decode_batch_predictions(pred):\n",
|
||
|
" input_len = np.ones(pred.shape[0]) * pred.shape[1]\n",
|
||
|
" # Use greedy search. For complex tasks, you can use beam search\n",
|
||
|
" results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][\n",
|
||
|
" :, :max_length\n",
|
||
|
" ]\n",
|
||
|
" # Iterate over the results and get back the text\n",
|
||
|
" output_text = []\n",
|
||
|
" for res in results:\n",
|
||
|
" res = tf.strings.reduce_join(num_to_char(res)).numpy().decode(\"utf-8\")\n",
|
||
|
" output_text.append(res)\n",
|
||
|
" return output_text\n",
|
||
|
"\n",
|
||
|
"def lev(s1, s2):\n",
|
||
|
" m, n = len(s1), len(s2)\n",
|
||
|
" dp = np.zeros((m + 1, n + 1), dtype=int)\n",
|
||
|
"\n",
|
||
|
" for i in range(m + 1):\n",
|
||
|
" for j in range(n + 1):\n",
|
||
|
" if i == 0:\n",
|
||
|
" dp[i][j] = j\n",
|
||
|
" elif j == 0:\n",
|
||
|
" dp[i][j] = i\n",
|
||
|
" elif s1[i - 1] == s2[j - 1]:\n",
|
||
|
" dp[i][j] = dp[i - 1][j - 1]\n",
|
||
|
" else:\n",
|
||
|
" dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])\n",
|
||
|
"\n",
|
||
|
" return dp[m][n]\n",
|
||
|
"# Let's check results on some validation samples\n",
|
||
|
"j = 0\n",
|
||
|
"_, ax = plt.subplots(4, 4, figsize=(15, 5))\n",
|
||
|
"\n",
|
||
|
"for batch in validation_dataset.take(4):\n",
|
||
|
" batch_images = batch[\"image\"]\n",
|
||
|
" batch_labels = batch[\"label\"]\n",
|
||
|
"\n",
|
||
|
" preds = prediction_model.predict(batch_images)\n",
|
||
|
" pred_texts = decode_batch_predictions(preds)\n",
|
||
|
"\n",
|
||
|
" orig_texts = []\n",
|
||
|
" for label in batch_labels:\n",
|
||
|
" print(tf.strings.reduce_join(num_to_char(label)).numpy().decode(\"utf-8\"))\n",
|
||
|
" label = tf.strings.reduce_join(num_to_char(label)).numpy().decode(\"utf-8\")\n",
|
||
|
" orig_texts.append(label)\n",
|
||
|
"\n",
|
||
|
" for i in range(len(pred_texts)):\n",
|
||
|
" img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)\n",
|
||
|
" img = img.T\n",
|
||
|
" pred = pred_texts[i].replace('[UNK]', '')\n",
|
||
|
" comp = orig_texts[i];\n",
|
||
|
" if len(comp) == 5:\n",
|
||
|
" comp += ' '\n",
|
||
|
" if len(pred) == 5:\n",
|
||
|
" pred += ' '\n",
|
||
|
" dist = lev(pred, comp)\n",
|
||
|
" print([pred, comp])\n",
|
||
|
" title = f\"P: {pred} T: {comp} ({dist})\"\n",
|
||
|
" ax[j // 4,i % 4].imshow(img, cmap=\"gray\")\n",
|
||
|
" ax[j // 4, i % 4].set_title(title, color=('green' if comp in pred else 'red'))\n",
|
||
|
" ax[j // 4, i % 4].axis(\"off\")\n",
|
||
|
" j += 1\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 17,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"INFO:tensorflow:Assets written to: captcha_75_25.tf/assets\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"INFO:tensorflow:Assets written to: captcha_75_25.tf/assets\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"model.save('captcha_75_25.h5')\n",
|
||
|
"model.save('captcha_75_25.keras')\n",
|
||
|
"model.save('captcha_75_25.tf')\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"accelerator": "GPU",
|
||
|
"colab": {
|
||
|
"name": "captcha_ocr",
|
||
|
"provenance": [],
|
||
|
"toc_visible": true
|
||
|
},
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 4
|
||
|
}
|