EasyOCR
# 将 VNTranslator OCR 与 EasyOCR 引擎集成
# 版本:1.0
# 作者:Fazx - GarudaMods | https://www.patreon.com/vntranslator
"""
# ==================================================================
# EasyOCR: https://github.com/JaidedAI/EasyOCR
# 要求:python 3.10+ 和 PyTorch
# 安装命令:pip install easyocr
# ==================================================================
# 使用此脚本运行:python vntocr_easyocr.py
# 在 VNTranslator 中使用自定义引擎 - HTTP POST 并配置:
# -- URL: http://127.0.0.1:5353
# -- 内容类型:application/json
# -- 头部:{}
# -- 请求体:{"image":"$IMAGE_BASE64", "langs": ["ja"]}
# -- 响应类型:JSON
# -- 响应查询:fullText
# ==================================================================
# 语言(两字母 ISO) https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
# -- 日语 = ja
# -- 英语 = en
# ==================================================================
"""
from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import base64
import re
import json
import numpy as np
import easyocr
APP_HOST = "localhost"
APP_PORT = 5353
APP_DEBUG = True
def parse_ocr_result(easyocr_result):
full_text = ""
lines = []
for entry in easyocr_result:
polygon = entry[0]
text = entry[1]
confidence = entry[2]
x_min = int(min(point[0] for point in polygon))
y_min = int(min(point[1] for point in polygon))
x_max = int(max(point[0] for point in polygon))
y_max = int(max(point[1] for point in polygon))
w = x_max - x_min
h = y_max - y_min
x = x_min
y = y_min
lines.append({
"text": text,
"w": int(w),
"h": int(h),
"x": int(x),
"y": int(y),
"confidence": float(confidence)
})
full_text += text + " "
full_text = full_text.strip()
return {
"fullText": full_text,
"lines": lines
}
def base64_to_numpy(base64_string):
if not base64_string:
raise ValueError("Base64 string is empty or missing")
if "," in base64_string:
base64_string = base64_string.split(",")[1]
try:
image_decode = base64.b64decode(base64_string)
print("Base64 decoding successful")
# 使用 PIL 打开图像
image = Image.open(BytesIO(image_decode))
print(f"Image format: {image.format}, size: {image.size}")
# 将 PIL 图像转换为 NumPy 数组
image_np = np.array(image)
print(f"Converted image to NumPy array with shape: {image_np.shape}")
return image_np
except Exception as e:
raise ValueError(f"Image decoding failed: {e}")
############################################################
app = Flask(__name__)
default_langs = ["ja"]
reader = easyocr.Reader(default_langs)
@app.route("/", methods=["POST"])
def ocr_endpoint():
global default_langs, reader
try:
print("\n\n=== OCR Request ===")
print(f"Method: {request.method}")
print(f"Headers: {dict(request.headers)}")
if not request.is_json:
print("Request is not JSON")
return jsonify({"error": "Request must be JSON"}), 400
data = request.get_json()
# 记录负载
print(f"Request JSON keys: {list(data.keys())}")
# 检查图像
if "image" not in data:
print("No image data")
return jsonify({"error": "No image data"}), 400
# 解码 base64 图像
try:
image = base64_to_numpy(data["image"])
except Exception as e:
print(f"Image decoding failed: {e}")
return jsonify({"error": f"Image decoding failed: {str(e)}"}), 400
# 检查语言
langs = data.get("langs", ["ja"])
try:
if langs != default_langs:
default_langs = langs
reader = easyocr.Reader(default_langs)
except Exception as e:
print(f"Load model failed: {e}")
return jsonify({"error": f"Load model failed: {str(e)}"}), 400
print(f"langs: {langs}")
# 检查是否绘制边框
draw_bounding_box = data.get("draw_bounding_box", False)
print(f"draw_bounding_box: {draw_bounding_box}")
# 运行 OCR
# https://github.com/JaidedAI/EasyOCR?tab=readme-ov-file#usage
result = reader.readtext(image)
print(f"OCR completed successfully: {result}")
# 解析结果
parsed_result = parse_ocr_result(result)
parsed_result["draw_bounding_box"] = draw_bounding_box
json_result = json.dumps(parsed_result, indent=4, ensure_ascii=False)
return json_result
except Exception as e:
print(f"Error request: {e}")
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
print(f"=== Starting OCR server {APP_HOST} on port {APP_PORT} ===")
app.run(debug=APP_DEBUG, host=APP_HOST, port=APP_PORT)