# EasyOCR

{% code title="vntocr\_easyocr.py" %}

```python
# Integration VNTranslator OCR with EasyOCR engine
# Version: 1.0
# Author: Fazx - GarudaMods | https://www.patreon.com/vntranslator

"""
# ==================================================================
# EasyOCR: https://github.com/JaidedAI/EasyOCR
# Required: python 3.10+ and PyTorch
# Install with: pip install easyocr
# ==================================================================
# Run this script with: python vntocr_easyocr.py
# In VNTranslator use Custom Engine - HTTP POST with configuration:
# -- URL: http://127.0.0.1:5353
# -- Content type: application/json
# -- Headers: {}
# -- Body: {"image":"$IMAGE_BASE64", "langs": ["ja"]}
# -- Response type: JSON
# -- Response query: fullText
# ==================================================================
# Languages (two-letter ISO) https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
# -- Japanese = ja
# -- English = en
# ==================================================================
"""

from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import base64
import re
import json
import numpy as np
import easyocr

APP_HOST = "localhost"
APP_PORT = 5353
APP_DEBUG = True

def parse_ocr_result(easyocr_result):
    full_text = ""
    lines = []

    for entry in easyocr_result:
        polygon = entry[0]
        text = entry[1]
        confidence = entry[2]
        x_min = int(min(point[0] for point in polygon))
        y_min = int(min(point[1] for point in polygon))
        x_max = int(max(point[0] for point in polygon))
        y_max = int(max(point[1] for point in polygon))
        w = x_max - x_min
        h = y_max - y_min
        x = x_min
        y = y_min
        lines.append({
            "text": text,
            "w": int(w),
            "h": int(h),
            "x": int(x),
            "y": int(y),
            "confidence": float(confidence)
        })
        full_text += text + " "

    full_text = full_text.strip()
    return {
        "fullText": full_text,
        "lines": lines
    }

def base64_to_numpy(base64_string):
    if not base64_string:
        raise ValueError("Base64 string is empty or missing")

    if "," in base64_string:
        base64_string = base64_string.split(",")[1]

    try:
        image_decode = base64.b64decode(base64_string)
        print("Base64 decoding successful")

        # open the image with PIL
        image = Image.open(BytesIO(image_decode))
        print(f"Image format: {image.format}, size: {image.size}")

        # convert PIL image to NumPy array
        image_np = np.array(image)
        print(f"Converted image to NumPy array with shape: {image_np.shape}")

        return image_np
    except Exception as e:
        raise ValueError(f"Image decoding failed: {e}")

############################################################

app = Flask(__name__)
default_langs = ["ja"]
reader = easyocr.Reader(default_langs)

@app.route("/", methods=["POST"])
def ocr_endpoint(): 
    global default_langs, reader

    try:
        print("\n\n=== OCR Request ===")
        print(f"Method: {request.method}")
        print(f"Headers: {dict(request.headers)}")
        
        if not request.is_json:
            print("Request is not JSON")
            return jsonify({"error": "Request must be JSON"}), 400
        
        data = request.get_json()

        # log payload
        print(f"Request JSON keys: {list(data.keys())}")

        # check image
        if "image" not in data:
            print("No image data")
            return jsonify({"error": "No image data"}), 400
        
        # decode base64 image
        try:            
            image = base64_to_numpy(data["image"])
        except Exception as e:
            print(f"Image decoding failed: {e}")
            return jsonify({"error": f"Image decoding failed: {str(e)}"}), 400

        # check langs
        langs = data.get("langs", ["ja"])
        try:
            if langs != default_langs:
                default_langs = langs
                reader = easyocr.Reader(default_langs)
        except Exception as e:
            print(f"Load model failed: {e}")
            return jsonify({"error": f"Load model failed: {str(e)}"}), 400
        print(f"langs: {langs}")

        # check draw bounding box
        draw_bounding_box = data.get("draw_bounding_box", False)
        print(f"draw_bounding_box: {draw_bounding_box}")

        # run ocr
        # https://github.com/JaidedAI/EasyOCR?tab=readme-ov-file#usage
        result = reader.readtext(image)
        print(f"OCR completed successfully: {result}")

        # parse result
        parsed_result = parse_ocr_result(result)       
        parsed_result["draw_bounding_box"] = draw_bounding_box
        json_result = json.dumps(parsed_result, indent=4, ensure_ascii=False)
        return json_result

    except Exception as e:
        print(f"Error request: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    print(f"=== Starting OCR server {APP_HOST} on port {APP_PORT} ===")
    app.run(debug=APP_DEBUG, host=APP_HOST, port=APP_PORT)

```

{% endcode %}


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.vntranslator.com/advanced/ocr-server-kit/easyocr.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
