SuryaOCR

vntocr_suryaocr.py
# SuryaOCR 엔진과 VNTranslator OCR 통합
# 버전: 1.0
# 작성자: Fazx - GarudaMods | https://www.patreon.com/vntranslator

"""
# ==================================================================
# Surya OCR: https://github.com/VikParuchuri/surya
# 필요 사항: python 3.10+ 및 PyTorch
# 설치 방법: pip install surya-ocr
# ==================================================================
# 이 스크립트 실행: python vntocr_suryaocr.py
# VNTranslator에서 Custom Engine - HTTP POST로 다음 구성 사용:
# -- URL: http://127.0.0.1:5353
# -- 콘텐츠 유형: application/json
# -- 헤더: {}
# -- 본문: {"image":"$IMAGE_BASE64", "langs": ["ja"]}
# -- 응답 유형: JSON
# -- 응답 쿼리: fullText
# ==================================================================
# 언어(두 글자 ISO) https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes
# -- 일본어 = ja
# -- 영어 = en
# ==================================================================
"""

from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import base64
import re
import json
from surya.ocr import run_ocr
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor

APP_HOST = "localhost"
APP_PORT = 5353
APP_DEBUG = True

def format_ocr_result(ocr_result):
    full_text = ""
    boxes = []

    for result in ocr_result:
        for box in result["text_boxes"]:
            bbox = box["bbox"]
            x, y = bbox[0], bbox[1]
            w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
            boxes.append({
                "text": box["text"],
                "w": w,
                "h": h,
                "x": x,
                "y": y
            })

    boxes.sort(key=lambda box: (box["y"], box["x"]))
    full_text = " ".join(box["text"] for box in boxes).strip()

    return {
        "fullText": full_text,
        "boxes": boxes
    }

def parse_ocr_result(ocr_result):
    if not isinstance(ocr_result, list):
        raise ValueError("ocr_result is not a list")

    parsed_results = []
    for result in ocr_result:
        text_lines = []
        for line in result.text_lines:
            text_lines.append({
                "polygon": line.polygon,
                "confidence": line.confidence,
                "text": line.text,
                "bbox": line.bbox
            })

        parsed_results.append({
            "text_boxes": text_lines,
            "languages": result.languages,
            "image_bbox": result.image_bbox
        })

    return format_ocr_result(parsed_results)

############################################################

app = Flask(__name__)
det_processor = load_det_processor()
det_model = load_det_model()
rec_model = load_rec_model()
rec_processor = load_rec_processor()

@app.route("/", methods=["POST"])
def ocr_endpoint():    

    try:
        print("\n\n=== OCR 요청 ===")
        print(f"메서드: {request.method}")
        print(f"헤더: {dict(request.headers)}")
        
        if not request.is_json:
            print("요청이 JSON이 아님")
            return jsonify({"error": "요청은 JSON이어야 합니다"}), 400
        
        data = request.get_json()

        # 페이로드 기록
        print(f"요청 JSON 키: {list(data.keys())}")

        # 이미지 확인
        if "image" not in data:
            print("이미지 데이터 없음")
            return jsonify({"error": "이미지 데이터가 없습니다"}), 400
        
        # base64 이미지 디코딩
        try:
            image_decode = base64.b64decode(data["image"])
            image = Image.open(BytesIO(image_decode))
            print("이미지가 Base64에서 성공적으로 디코딩되었습니다")
        except Exception as e:
            print(f"이미지 디코딩 실패: {e}")
            return jsonify({"error": f"이미지 디코딩 실패: {str(e)}"}), 400

        # 언어 확인
        langs = data.get("langs", ["ja"])
        print(f"langs: {langs}")

        # 경계 상자 그리기 확인
        draw_bounding_box = data.get("draw_bounding_box", False)
        print(f"draw_bounding_box: {draw_bounding_box}")

        # OCR 실행
        # https://github.com/VikParuchuri/surya?tab=readme-ov-file#from-python
        result = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
        print(f"OCR 완료: {result}")
        
        """
        [OCRResult(
            text_lines=[
                TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0]),
                TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0]),
                TextLine(polygon=[[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], confidence=0.0, text='String', bbox=[0.0, 0.0, 0.0, 0.0])
            ], 
            languages=['ja'], image_bbox=[0.0, 0.0, 0.0, 0.0]
        )]
        """

        # 결과 파싱
        parsed_result = parse_ocr_result(result)       
        parsed_result['draw_bounding_box'] = draw_bounding_box
        json_result = json.dumps(parsed_result, indent=4, ensure_ascii=False)
        return json_result

    except Exception as e:
        print(f"요청 오류: {e}")
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    print(f"=== OCR 서버 시작 {APP_HOST} 포트 {APP_PORT} ===")
    app.run(debug=APP_DEBUG, host=APP_HOST, port=APP_PORT)
이전EasyOCR 다음API 게이트웨이