feat: 切换可选RapidOCR后端并修复macOS识别卡住

2026-02-25 09:36:37 +08:00
6 changed files with 322 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@
 sudo apt-get install -y libgl1-mesa-glx libglib2.0-0
 # 安装 Python 依赖
 python -m venv .venv
 source .venv/bin/activate
 pip install -r requirements.txt
 ```
@@ -36,18 +38,42 @@ pip install -r requirements.txt
 **命令行批处理**
 ```bash
 # 将图片放入 data/input/ 目录
-python src/main.py
+.venv/bin/python src/main.py
 # 结果保存在 data/output/result.xlsx
 ```
 **桌面应用**
 ```bash
-python src/desktop.py
+.venv/bin/python src/desktop.py
 # 启动 PyQt6 窗口，可选择摄像头实时拍照识别
 ```
 ### 3. OCR 后端切换（RapidOCR / PaddleOCR）
 默认后端为 **RapidOCR(ONNX)**，可通过环境变量切换：
 ```bash
 # 默认：RapidOCR（推荐，跨平台更稳）
 POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py
 # 强制使用 PaddleOCR
 POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py
 # 自动：优先 RapidOCR，失败回退 PaddleOCR
 POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py
 ```
 常用相关环境变量：
 - `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`：是否允许回退到 Paddle（默认：
  - `POST_OCR_BACKEND=auto` 时为 `1`
  - 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`）
 - `POST_OCR_MP_START_METHOD=spawn|fork`：强制指定 OCR 子进程启动方式（macOS 默认：rapidocr 用 `spawn`，paddle 用 `fork`）
 - `POST_OCR_MAIN_SPLIT=1~4`：主 ROI 分片数（默认 2）
 - `POST_OCR_MAX_ROI_WIDTH=600+`：识别前缩放宽度上限（默认 960）
 - `POST_OCR_JOB_TIMEOUT_SEC`：单次识别超时秒数（默认 25）
 ---
 ## Windows 桌面离线版（zip 目录包）
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 # 桌面版依赖（本地电脑安装）
 # ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题，必须使用 2.x
 rapidocr-onnxruntime
 paddleocr==2.10.0
 paddlepaddle==2.6.2
--- a/src/desktop.py
+++ b/src/desktop.py
@@ -89,15 +89,22 @@ class OCRService(QObject):
        super().__init__()
        self._models_base_dir = models_base_dir
        self._busy = False
        self.backend_name = "unknown"
        self._stop_event = threading.Event()
-        method_default = "fork" if sys.platform == "darwin" else "spawn"
+        backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr"
        if sys.platform == "darwin":
            # macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差，rapidocr 默认走 spawn。
            # Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住，因此保留 fork。
            method_default = "fork" if backend_req == "paddle" else "spawn"
        else:
            method_default = "spawn"
        method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default
        try:
            self._ctx = mp.get_context(method)
        except ValueError:
            method = method_default
            self._ctx = mp.get_context(method_default)
-        logger.info("OCR multiprocessing start_method=%s", method)
+        logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req)
        self._req_q = None
        self._resp_q = None
        self._proc = None
@@ -189,7 +196,12 @@ class OCRService(QObject):
                logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix)
                continue
            if msg_type == "ready":
-                logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None))
+                self.backend_name = str(msg.get("backend", "unknown"))
                logger.info(
                    "OCR 子进程已就绪 pid=%s backend=%s",
                    getattr(self._proc, "pid", None),
                    self.backend_name,
                )
                self.ready.emit()
                continue
            if msg_type == "init_error":
@@ -448,11 +460,16 @@ class MainWindow(QMainWindow):
    def _on_ocr_ready(self) -> None:
        try:
            self._ocr_ready = True
-            self.statusBar().showMessage("OCR 模型已加载（离线）")
+            backend = "unknown"
            try:
                backend = str(getattr(self._ocr_service, "backend_name", "unknown"))
            except Exception:
                backend = "unknown"
            self.statusBar().showMessage(f"OCR 模型已加载（{backend}）")
            btn = getattr(self, "btn_capture", None)
            if btn is not None:
                btn.setEnabled(self.cap is not None and not self._ocr_busy)
-            logger.info("OCR ready")
+            logger.info("OCR ready backend=%s", backend)
        except Exception as e:
            logger.exception("处理 OCR ready 回调失败：%s", str(e))
--- a/src/main.py
+++ b/src/main.py
@@ -1,8 +1,10 @@
 import os
 import glob
 import cv2
 import pandas as pd
 from tqdm import tqdm
-from paddleocr import PaddleOCR
+from pathlib import Path
 from ocr_engine import create_ocr_engine
 from processor import extract_info, save_to_excel
 # 禁用联网检查，加快启动速度
@@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
 def main():
-    # 初始化 PaddleOCR
+    # 初始化 OCR 引擎（默认 rapidocr，可通过环境变量切换）
-    ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
+    models_dir = Path("models")
    ocr_engine = create_ocr_engine(models_base_dir=models_dir)
    input_dir = "data/input"
    output_dir = "data/output"
@@ -36,31 +39,31 @@ def main():
    for img_path in tqdm(image_paths):
        try:
            # 1. 执行 OCR 识别
-            result = ocr.ocr(img_path, cls=False)
+            img = cv2.imread(img_path)
            if img is None:
                errors.append(
                    {"file": os.path.basename(img_path), "error": "图片读取失败"}
                )
                continue
            lines = ocr_engine.infer_lines(img)
            # 2. 提取文字行
            ocr_texts = []
            ocr_lines = []
-            if result and result[0]:
+            for line in lines:
-                for line in result[0]:
+                text = str(line.text).strip()
-                    # line 格式: [box, (text, confidence)]
+                if not text:
-                    if line and len(line) >= 2:
+                    continue
-                        text = str(line[1][0])
+                ocr_texts.append(text)
-                        ocr_texts.append(text)
+                ocr_lines.append(
-                        conf = None
+                    {
-                        try:
+                        "text": text,
-                            conf = float(line[1][1])
+                        "box": line.box,
-                        except Exception:
+                        "conf": line.conf,
-                            conf = None
+                        "source": "main",
-                        ocr_lines.append(
+                        "roi_index": 0,
-                            {
+                    }
-                                "text": text,
+                )
                                "box": line[0],
                                "conf": conf,
                                "source": "main",
                                "roi_index": 0,
                            }
                        )
            # 3. 结构化解析
            if ocr_texts:
--- a/src/ocr_engine.py
+++ b/src/ocr_engine.py
@@ -0,0 +1,217 @@
 from __future__ import annotations
 import logging
 import os
 import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, List, Optional
 logger = logging.getLogger("post_ocr.ocr_engine")
@dataclass
 class OCRLine:
    text: str
    box: Any
    conf: Optional[float] = None
 class BaseOCREngine:
    backend_name: str = "unknown"
    def infer_lines(self, img: Any) -> List[OCRLine]:
        raise NotImplementedError
 def _to_float(val: Any) -> Optional[float]:
    try:
        return float(val)
    except Exception:
        return None
 class PaddleOCREngine(BaseOCREngine):
    backend_name = "paddle"
    def __init__(self, models_base_dir: Path):
        from ocr_offline import create_offline_ocr
        self._ocr = create_offline_ocr(models_base_dir=models_base_dir)
    def infer_lines(self, img: Any) -> List[OCRLine]:
        result = self._ocr.ocr(img, cls=False)
        lines: List[OCRLine] = []
        if result and result[0]:
            for line in result[0]:
                if not line or len(line) < 2:
                    continue
                text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else ""
                if not text:
                    continue
                conf = None
                if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
                    conf = _to_float(line[1][1])
                lines.append(OCRLine(text=text, box=line[0], conf=conf))
        return lines
 class RapidOCREngine(BaseOCREngine):
    backend_name = "rapidocr"
    def __init__(self, models_base_dir: Path):
        # 按官方包名导入：rapidocr-onnxruntime -> rapidocr_onnxruntime
        from rapidocr_onnxruntime import RapidOCR
        kwargs: dict[str, Any] = {}
        # 可选：如果用户准备了本地 ONNX 模型，可通过环境变量覆盖路径
        det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip()
        cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip()
        rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip()
        dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip()
        if det_path:
            kwargs["det_model_path"] = det_path
        if cls_path:
            kwargs["cls_model_path"] = cls_path
        if rec_path:
            kwargs["rec_model_path"] = rec_path
        if dict_path:
            kwargs["rec_keys_path"] = dict_path
        self._ocr = RapidOCR(**kwargs)
        self._models_base_dir = models_base_dir
    def _parse_result_item(self, item: Any) -> Optional[OCRLine]:
        if isinstance(item, dict):
            text = str(item.get("text") or item.get("txt") or "").strip()
            if not text:
                return None
            box = item.get("box") or item.get("points")
            conf = _to_float(item.get("score", item.get("conf")))
            return OCRLine(text=text, box=box, conf=conf)
        if not isinstance(item, (list, tuple)):
            return None
        # 常见格式1: [box, text, score]
        if len(item) >= 2 and isinstance(item[1], str):
            box = item[0]
            text = item[1].strip()
            conf = _to_float(item[2]) if len(item) >= 3 else None
            if text:
                return OCRLine(text=text, box=box, conf=conf)
            return None
        # 常见格式2（Paddle风格）: [box, (text, score)]
        if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1:
            text = str(item[1][0]).strip()
            if not text:
                return None
            conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None
            return OCRLine(text=text, box=item[0], conf=conf)
        return None
    def infer_lines(self, img: Any) -> List[OCRLine]:
        # RapidOCR 常见返回：(ocr_res, elapse)
        raw = self._ocr(img)
        result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw
        if result is None:
            return []
        lines: List[OCRLine] = []
        # 一些版本返回对象：boxes/txts/scores
        if hasattr(result, "boxes") and hasattr(result, "txts"):
            boxes = list(getattr(result, "boxes") or [])
            txts = list(getattr(result, "txts") or [])
            scores = list(getattr(result, "scores") or [])
            for idx, text in enumerate(txts):
                t = str(text).strip()
                if not t:
                    continue
                box = boxes[idx] if idx < len(boxes) else None
                conf = _to_float(scores[idx]) if idx < len(scores) else None
                lines.append(OCRLine(text=t, box=box, conf=conf))
            return lines
        if isinstance(result, (list, tuple)):
            for item in result:
                parsed = self._parse_result_item(item)
                if parsed is not None:
                    lines.append(parsed)
        return lines
 def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine:
    """
    创建 OCR 引擎。
    环境变量：
    - POST_OCR_BACKEND: rapidocr | paddle | auto（默认 rapidocr）
    - POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0（不设置时按后端类型决定）
    """
    backend_env = os.environ.get("POST_OCR_BACKEND")
    backend = (backend_env or "rapidocr").strip().lower() or "rapidocr"
    fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE")
    if fallback_env is None or fallback_env.strip() == "":
        # 规则：
        # 1) auto 模式默认允许回退
        # 2) 用户显式指定 rapidocr 时，默认不静默回退（避免“看似切到 rapidocr 实际仍是 paddle”）
        # 3) 其他场景保持兼容，默认允许回退
        if backend == "auto":
            allow_fallback = True
        elif backend == "rapidocr" and backend_env is not None:
            allow_fallback = False
        else:
            allow_fallback = True
    else:
        allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"}
    logger.info(
        "create_ocr_engine: request=%s explicit=%s fallback=%s python=%s",
        backend,
        backend_env is not None,
        allow_fallback,
        sys.executable,
    )
    if backend in {"rapidocr", "onnx"}:
        try:
            engine = RapidOCREngine(models_base_dir=models_base_dir)
            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
            return engine
        except Exception as e:
            logger.exception("create_ocr_engine: rapidocr 初始化失败")
            if allow_fallback:
                logger.warning("create_ocr_engine: 已回退到 paddle")
                engine = PaddleOCREngine(models_base_dir=models_base_dir)
                logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
                return engine
            raise RuntimeError(
                "POST_OCR_BACKEND=rapidocr 初始化失败，且未启用回退。"
                "请先安装 rapidocr-onnxruntime，或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。"
            ) from e
    if backend == "paddle":
        engine = PaddleOCREngine(models_base_dir=models_base_dir)
        logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
        return engine
    # auto: 优先 rapidocr，失败回退 paddle
    if backend == "auto":
        try:
            engine = RapidOCREngine(models_base_dir=models_base_dir)
            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
            return engine
        except Exception:
            logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败，回退 paddle")
            engine = PaddleOCREngine(models_base_dir=models_base_dir)
            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
            return engine
    # 未知值兜底
    logger.warning("create_ocr_engine: 未知后端 '%s'，回退 paddle", backend)
    engine = PaddleOCREngine(models_base_dir=models_base_dir)
    logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
    return engine
--- a/src/ocr_worker_process.py
+++ b/src/ocr_worker_process.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 # 必须在所有 paddle/numpy import 之前设置，否则 macOS spawn 子进程推理会死锁
 import os
 import logging
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
 from pathlib import Path
 from typing import Any
-from ocr_offline import create_offline_ocr
+from ocr_engine import create_ocr_engine
 from processor import extract_info
 logger = logging.getLogger("post_ocr.ocr_worker")
 def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
    """
@@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
    """
    try:
        response_q.put({"type": "progress", "stage": "init_start"})
-        ocr = create_offline_ocr(models_base_dir=Path(models_base_dir))
+        engine = create_ocr_engine(models_base_dir=Path(models_base_dir))
-        response_q.put({"type": "ready"})
+        response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")})
    except Exception as e:
        logger.exception("OCR 子进程初始化失败")
        response_q.put({"type": "init_error", "error": str(e)})
        return
@@ -58,31 +62,26 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
                if img is None:
                    continue
                response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"})
-                result = ocr.ocr(img, cls=False)
+                lines = engine.infer_lines(img)
                response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"})
-                if result and result[0]:
+                for line in lines:
-                    for line in result[0]:
+                    text = str(line.text).strip()
-                        if line and len(line) >= 2:
+                    if not text:
-                            text = str(line[1][0])
+                        continue
-                            ocr_texts.append(text)
+                    ocr_texts.append(text)
-                            conf = None
+                    # 将切片内的局部坐标还原为完整 ROI 坐标
-                            try:
+                    box = line.box
-                                conf = float(line[1][1])
+                    if y_offset and isinstance(box, (list, tuple)):
-                            except Exception:
+                        box = [[p[0], p[1] + y_offset] for p in box]
-                                conf = None
+                    ocr_lines.append(
-                            # 将切片内的局部坐标还原为完整 ROI 坐标
+                        {
-                            box = line[0]
+                            "text": text,
-                            if y_offset and isinstance(box, (list, tuple)):
+                            "box": box,
-                                box = [[p[0], p[1] + y_offset] for p in box]
+                            "conf": line.conf,
-                            ocr_lines.append(
+                            "source": source,
-                                {
+                            "roi_index": roi_index,
-                                    "text": text,
+                        }
-                                    "box": box,
+                    )
                                    "conf": conf,
                                    "source": source,
                                    "roi_index": roi_index,
                                }
                            )
            record = extract_info(ocr_lines if ocr_lines else ocr_texts)
            response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)})
@@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
                }
            )
        except Exception as e:
            logger.exception("OCR 子进程处理任务失败 job=%s", job_id)
            response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})