SuryaOCR
# Integracja OCR VNTranslator z silnikiem SuryaOCR
# Wersja: 1.0
# Autor: Fazx - GarudaMods | https://www.patreon.com/vntranslator
"""
# ==================================================================
# Surya OCR: https://github.com/VikParuchuri/surya
# Wymagane: python 3.10+ i PyTorch
# Zainstaluj za pomocą: pip install surya-ocr
# ==================================================================
# Uruchom ten skrypt za pomocą: python vntocr_suryaocr.py
# W VNTranslator użyj Custom Engine - HTTP POST z konfiguracją:
# -- URL: http://127.0.0.1:5353
# -- Typ treści: application/json
# -- Nagłówki: {}
# -- Body: {"image":"$IMAGE_BASE64", "langs": ["ja"]}
# -- Typ odpowiedzi: JSON
# -- Zapytanie odpowiedzi: fullText
# ==================================================================
# Języki (dwuliterowe ISO) https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
# -- Japoński = ja
# -- Angielski = en
# ==================================================================
"""
from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import base64
import re
import json
from surya.ocr import run_ocr
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor
APP_HOST = "localhost"
APP_PORT = 5353
APP_DEBUG = True
def format_ocr_result(ocr_result):
full_text = ""
boxes = []
for result in ocr_result:
for box in result["text_boxes"]:
bbox = box["bbox"]
x, y = bbox[0], bbox[1]
w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
boxes.append({
"text": box["text"],
"w": w,
"h": h,
"x": x,
"y": y
})
boxes.sort(key=lambda box: (box["y"], box["x"]))
full_text = " ".join(box["text"] for box in boxes).strip()
return {
"fullText": full_text,
"boxes": boxes
}
def parse_ocr_result(ocr_result):
if not isinstance(ocr_result, list):
raise ValueError("ocr_result is not a list")
parsed_results = []
for result in ocr_result:
text_lines = []
for line in result.text_lines:
text_lines.append({
"polygon": line.polygon,
"confidence": line.confidence,
"text": line.text,
"bbox": line.bbox
})
parsed_results.append({
"text_boxes": text_lines,
"languages": result.languages,
"image_bbox": result.image_bbox
})
return format_ocr_result(parsed_results)
############################################################
app = Flask(__name__)
det_processor = load_det_processor()
det_model = load_det_model()
rec_model = load_rec_model()
rec_processor = load_rec_processor()
@app.route("/", methods=["POST"])
def ocr_endpoint():
try:
print("\n\n=== Żądanie OCR ===")
print(f"Metoda: {request.method}")
print(f"Nagłówki: {dict(request.headers)}")
if not request.is_json:
print("Żądanie nie jest JSON")
return jsonify({"error": "Żądanie musi być w formacie JSON"}), 400
data = request.get_json()
# logowanie treści
print(f"Klucze JSON żądania: {list(data.keys())}")
# sprawdź obraz
if "image" not in data:
print("Brak danych obrazu")
return jsonify({"error": "Brak danych obrazu"}), 400
# dekoduj obraz base64
try:
image_decode = base64.b64decode(data["image"])
image = Image.open(BytesIO(image_decode))
print("Obraz został pomyślnie odkodowany z Base64")
except Exception as e:
print(f"Dekodowanie obrazu nie powiodło się: {e}")
return jsonify({"error": f"Dekodowanie obrazu nie powiodło się: {str(e)}"}), 400
# sprawdź języki
langs = data.get("langs", ["ja"])
print(f"języki: {langs}")
# sprawdź rysowanie ramki ograniczającej
draw_bounding_box = data.get("draw_bounding_box", False)
print(f"draw_bounding_box: {draw_bounding_box}")
# uruchom OCR
# https://github.com/VikParuchuri/surya?tab=readme-ov-file#from-python
result = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
print(f"OCR zakończony pomyślnie: {result}")
"""
[OCRResult(
text_lines=[
TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0]),
TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0]),
TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0])
],
languages=['ja'], image_bbox=[0.0, 0.0, 0.0, 0.0]
)]
"""
# parsuj wynik
parsed_result = parse_ocr_result(result)
parsed_result['draw_bounding_box'] = draw_bounding_box
json_result = json.dumps(parsed_result, indent=4, ensure_ascii=False)
return json_result
except Exception as e:
print(f"Błąd żądania: {e}")
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
print(f"=== Uruchamianie serwera OCR {APP_HOST} na porcie {APP_PORT} ===")
app.run(debug=APP_DEBUG, host=APP_HOST, port=APP_PORT)