VNTranslator
Become a PatronJoin Our Discord
  • Overview
  • GETTING STARTED
    • Getting Started
    • System Requirements
    • Download & Installation
    • Interface Basics
  • Features
    • Launcher
    • Modules
    • Translation
      • Translation Settings
      • Translation Glossary
      • Extra Options
        • Pre-translation
        • Post-translation
        • Variables
      • Transcheck
      • Advanced Settings
      • Translation Editor
      • Export Translation
      • Import Translation
    • MT Engines
      • MT Settings
      • DeepL API
      • OpenAI Translate (Legacy)
    • Extra Window
    • Hyper Overlay
    • Extensions
      • JParser
      • Jisho
      • Fast OCR
    • Hotkeys
  • User Guide
    • Clipboard
    • OCR
      • Features
        • OCR Screen
        • OCR Master
        • Pre-processing
        • OCR Engines
          • Tesseract OCR
          • Windows OCR
          • Google Cloud Vision
          • Azure Cloud Vision
          • Google Lens
          • Custom - Command Line
          • Custom - HTTP POST
        • Post-processing
      • OCR Engine Installer
      • Understanding OCR and Improving Accuracy
      • OCR Engines Comparison
    • OCR GX 🎮
    • AutoTrans
      • Translation Modes
      • Font Replacement
      • Feature Settings
        • Translation
        • Mods
      • Extract & Translate
      • Steam Connect
      • RTL
      • FAQ
        • How Can I Improve Game Speed in AutoTrans?
        • How to Change Font Type in RenPy?
        • How to Change Font Size in Unity?
    • RenPy Games
    • Tyrano Builder
    • TextractorCLI
  • ADVANCED
    • LLMs
      • System Prompt
      • OpenAI API
      • GeminiAI API
    • Custom MT
      • Schema
      • Form Builder
      • Request & Response
      • Components
      • webLLM
      • MT Kit
      • V1 & V2 (Archive)
    • OCR Server Kit
      • EasyOCR
      • SuryaOCR
    • API Gateway
      • Translate
      • Translation Memory 🚧
    • RegExp
      • Matching
      • Replacement
  • Help
    • FAQ
    • Troubleshooting
      • Launcher
        • VNTranslator appears as a black box
      • Network Connection
      • Machine Translation (MT)
        • Web Scraping Timeout
        • API Error Codes
        • Clear Cookies & Site Data
      • OCR
      • AutoTrans
      • TextractorCLI
    • Glossary
    • Archives
      • Comparison of OCR Version 1.0 and 2.0
      • Workflow Explanation for OCR
      • OCR 1.0 (Archive)
        • Tesseract OCR
        • Screen Capture
        • Post-Capture Actions
    • Credits
Powered by GitBook
On this page
  1. ADVANCED
  2. OCR Server Kit

EasyOCR

vntocr_easyocr.py
# Integration VNTranslator OCR with EasyOCR engine
# Version: 1.0
# Author: Fazx - GarudaMods | https://www.patreon.com/vntranslator

"""
# ==================================================================
# EasyOCR: https://github.com/JaidedAI/EasyOCR
# Required: python 3.10+ and PyTorch
# Install with: pip install easyocr
# ==================================================================
# Run this script with: python vntocr_easyocr.py
# In VNTranslator use Custom Engine - HTTP POST with configuration:
# -- URL: http://127.0.0.1:5353
# -- Content type: application/json
# -- Headers: {}
# -- Body: {"image":"$IMAGE_BASE64", "langs": ["ja"]}
# -- Response type: JSON
# -- Response query: fullText
# ==================================================================
# Languages (two-letter ISO) https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
# -- Japanese = ja
# -- English = en
# ==================================================================
"""

from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import base64
import re
import json
import numpy as np
import easyocr

APP_HOST = "localhost"
APP_PORT = 5353
APP_DEBUG = True

def parse_ocr_result(easyocr_result):
    full_text = ""
    lines = []

    for entry in easyocr_result:
        polygon = entry[0]
        text = entry[1]
        confidence = entry[2]
        x_min = int(min(point[0] for point in polygon))
        y_min = int(min(point[1] for point in polygon))
        x_max = int(max(point[0] for point in polygon))
        y_max = int(max(point[1] for point in polygon))
        w = x_max - x_min
        h = y_max - y_min
        x = x_min
        y = y_min
        lines.append({
            "text": text,
            "w": int(w),
            "h": int(h),
            "x": int(x),
            "y": int(y),
            "confidence": float(confidence)
        })
        full_text += text + " "

    full_text = full_text.strip()
    return {
        "fullText": full_text,
        "lines": lines
    }

def base64_to_numpy(base64_string):
    if not base64_string:
        raise ValueError("Base64 string is empty or missing")

    if "," in base64_string:
        base64_string = base64_string.split(",")[1]

    try:
        image_decode = base64.b64decode(base64_string)
        print("Base64 decoding successful")

        # open the image with PIL
        image = Image.open(BytesIO(image_decode))
        print(f"Image format: {image.format}, size: {image.size}")

        # convert PIL image to NumPy array
        image_np = np.array(image)
        print(f"Converted image to NumPy array with shape: {image_np.shape}")

        return image_np
    except Exception as e:
        raise ValueError(f"Image decoding failed: {e}")

############################################################

app = Flask(__name__)
default_langs = ["ja"]
reader = easyocr.Reader(default_langs)

@app.route("/", methods=["POST"])
def ocr_endpoint(): 
    global default_langs, reader

    try:
        print("\n\n=== OCR Request ===")
        print(f"Method: {request.method}")
        print(f"Headers: {dict(request.headers)}")
        
        if not request.is_json:
            print("Request is not JSON")
            return jsonify({"error": "Request must be JSON"}), 400
        
        data = request.get_json()

        # log payload
        print(f"Request JSON keys: {list(data.keys())}")

        # check image
        if "image" not in data:
            print("No image data")
            return jsonify({"error": "No image data"}), 400
        
        # decode base64 image
        try:            
            image = base64_to_numpy(data["image"])
        except Exception as e:
            print(f"Image decoding failed: {e}")
            return jsonify({"error": f"Image decoding failed: {str(e)}"}), 400

        # check langs
        langs = data.get("langs", ["ja"])
        try:
            if langs != default_langs:
                default_langs = langs
                reader = easyocr.Reader(default_langs)
        except Exception as e:
            print(f"Load model failed: {e}")
            return jsonify({"error": f"Load model failed: {str(e)}"}), 400
        print(f"langs: {langs}")

        # check draw bounding box
        draw_bounding_box = data.get("draw_bounding_box", False)
        print(f"draw_bounding_box: {draw_bounding_box}")

        # run ocr
        # https://github.com/JaidedAI/EasyOCR?tab=readme-ov-file#usage
        result = reader.readtext(image)
        print(f"OCR completed successfully: {result}")

        # parse result
        parsed_result = parse_ocr_result(result)       
        parsed_result["draw_bounding_box"] = draw_bounding_box
        json_result = json.dumps(parsed_result, indent=4, ensure_ascii=False)
        return json_result

    except Exception as e:
        print(f"Error request: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    print(f"=== Starting OCR server {APP_HOST} on port {APP_PORT} ===")
    app.run(debug=APP_DEBUG, host=APP_HOST, port=APP_PORT)
PreviousOCR Server KitNextSuryaOCR

Last updated 4 months ago