feat: 切换可选RapidOCR后端并修复macOS识别卡住

2026-02-25 09:36:37 +08:00
6 changed files with 322 additions and 58 deletions
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@
 sudo apt-get install -y libgl1-mesa-glx libglib2.0-0

 # 安装 Python 依赖
+python -m venv .venv
+source .venv/bin/activate
 pip install -r requirements.txt
 ```

@@ -36,18 +38,42 @@ pip install -r requirements.txt
 **命令行批处理**
 ```bash
 # 将图片放入 data/input/ 目录
-python src/main.py
+.venv/bin/python src/main.py

 # 结果保存在 data/output/result.xlsx
 ```

 **桌面应用**
 ```bash
-python src/desktop.py
+.venv/bin/python src/desktop.py

 # 启动 PyQt6 窗口，可选择摄像头实时拍照识别
 ```

+### 3. OCR 后端切换（RapidOCR / PaddleOCR）
+
+默认后端为 **RapidOCR(ONNX)**，可通过环境变量切换：
+
+```bash
+# 默认：RapidOCR（推荐，跨平台更稳）
+POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py
+
+# 强制使用 PaddleOCR
+POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py
+
+# 自动：优先 RapidOCR，失败回退 PaddleOCR
+POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py
+```
+
+常用相关环境变量：
+- `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`：是否允许回退到 Paddle（默认：
+  - `POST_OCR_BACKEND=auto` 时为 `1`
+  - 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`）
+- `POST_OCR_MP_START_METHOD=spawn|fork`：强制指定 OCR 子进程启动方式（macOS 默认：rapidocr 用 `spawn`，paddle 用 `fork`）
+- `POST_OCR_MAIN_SPLIT=1~4`：主 ROI 分片数（默认 2）
+- `POST_OCR_MAX_ROI_WIDTH=600+`：识别前缩放宽度上限（默认 960）
+- `POST_OCR_JOB_TIMEOUT_SEC`：单次识别超时秒数（默认 25）
+
 ---

 ## Windows 桌面离线版（zip 目录包）
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 # 桌面版依赖（本地电脑安装）
 # ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题，必须使用 2.x
+rapidocr-onnxruntime
 paddleocr==2.10.0
 paddlepaddle==2.6.2

--- a/src/desktop.py
+++ b/src/desktop.py
@@ -89,15 +89,22 @@ class OCRService(QObject):
        super().__init__()
        self._models_base_dir = models_base_dir
        self._busy = False
+        self.backend_name = "unknown"
        self._stop_event = threading.Event()
-        method_default = "fork" if sys.platform == "darwin" else "spawn"
+        backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr"
+        if sys.platform == "darwin":
+            # macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差，rapidocr 默认走 spawn。
+            # Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住，因此保留 fork。
+            method_default = "fork" if backend_req == "paddle" else "spawn"
+        else:
+            method_default = "spawn"
        method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default
        try:
            self._ctx = mp.get_context(method)
        except ValueError:
            method = method_default
            self._ctx = mp.get_context(method_default)
-        logger.info("OCR multiprocessing start_method=%s", method)
+        logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req)
        self._req_q = None
        self._resp_q = None
        self._proc = None
@@ -189,7 +196,12 @@ class OCRService(QObject):
                logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix)
                continue
            if msg_type == "ready":
-                logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None))
+                self.backend_name = str(msg.get("backend", "unknown"))
+                logger.info(
+                    "OCR 子进程已就绪 pid=%s backend=%s",
+                    getattr(self._proc, "pid", None),
+                    self.backend_name,
+                )
                self.ready.emit()
                continue
            if msg_type == "init_error":
@@ -448,11 +460,16 @@ class MainWindow(QMainWindow):
    def _on_ocr_ready(self) -> None:
        try:
            self._ocr_ready = True
-            self.statusBar().showMessage("OCR 模型已加载（离线）")
+            backend = "unknown"
+            try:
+                backend = str(getattr(self._ocr_service, "backend_name", "unknown"))
+            except Exception:
+                backend = "unknown"
+            self.statusBar().showMessage(f"OCR 模型已加载（{backend}）")
            btn = getattr(self, "btn_capture", None)
            if btn is not None:
                btn.setEnabled(self.cap is not None and not self._ocr_busy)
-            logger.info("OCR ready")
+            logger.info("OCR ready backend=%s", backend)
        except Exception as e:
            logger.exception("处理 OCR ready 回调失败：%s", str(e))

--- a/src/main.py
+++ b/src/main.py
@@ -1,8 +1,10 @@
 import os
 import glob
+import cv2
 import pandas as pd
 from tqdm import tqdm
-from paddleocr import PaddleOCR
+from pathlib import Path
+from ocr_engine import create_ocr_engine
 from processor import extract_info, save_to_excel

 # 禁用联网检查，加快启动速度
@@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"


 def main():
-    # 初始化 PaddleOCR
-    ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
+    # 初始化 OCR 引擎（默认 rapidocr，可通过环境变量切换）
+    models_dir = Path("models")
+    ocr_engine = create_ocr_engine(models_base_dir=models_dir)

    input_dir = "data/input"
    output_dir = "data/output"
@@ -36,31 +39,31 @@ def main():
    for img_path in tqdm(image_paths):
        try:
            # 1. 执行 OCR 识别
-            result = ocr.ocr(img_path, cls=False)
+            img = cv2.imread(img_path)
+            if img is None:
+                errors.append(
+                    {"file": os.path.basename(img_path), "error": "图片读取失败"}
+                )
+                continue
+            lines = ocr_engine.infer_lines(img)

            # 2. 提取文字行
            ocr_texts = []
            ocr_lines = []
-            if result and result[0]:
-                for line in result[0]:
-                    # line 格式: [box, (text, confidence)]
-                    if line and len(line) >= 2:
-                        text = str(line[1][0])
-                        ocr_texts.append(text)
-                        conf = None
-                        try:
-                            conf = float(line[1][1])
-                        except Exception:
-                            conf = None
-                        ocr_lines.append(
-                            {
-                                "text": text,
-                                "box": line[0],
-                                "conf": conf,
-                                "source": "main",
-                                "roi_index": 0,
-                            }
-                        )
+            for line in lines:
+                text = str(line.text).strip()
+                if not text:
+                    continue
+                ocr_texts.append(text)
+                ocr_lines.append(
+                    {
+                        "text": text,
+                        "box": line.box,
+                        "conf": line.conf,
+                        "source": "main",
+                        "roi_index": 0,
+                    }
+                )

            # 3. 结构化解析
            if ocr_texts:
--- a/src/ocr_engine.py
+++ b/src/ocr_engine.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, List, Optional
+
+logger = logging.getLogger("post_ocr.ocr_engine")
+
+
+@dataclass
+class OCRLine:
+    text: str
+    box: Any
+    conf: Optional[float] = None
+
+
+class BaseOCREngine:
+    backend_name: str = "unknown"
+
+    def infer_lines(self, img: Any) -> List[OCRLine]:
+        raise NotImplementedError
+
+
+def _to_float(val: Any) -> Optional[float]:
+    try:
+        return float(val)
+    except Exception:
+        return None
+
+
+class PaddleOCREngine(BaseOCREngine):
+    backend_name = "paddle"
+
+    def __init__(self, models_base_dir: Path):
+        from ocr_offline import create_offline_ocr
+
+        self._ocr = create_offline_ocr(models_base_dir=models_base_dir)
+
+    def infer_lines(self, img: Any) -> List[OCRLine]:
+        result = self._ocr.ocr(img, cls=False)
+        lines: List[OCRLine] = []
+        if result and result[0]:
+            for line in result[0]:
+                if not line or len(line) < 2:
+                    continue
+                text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else ""
+                if not text:
+                    continue
+                conf = None
+                if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
+                    conf = _to_float(line[1][1])
+                lines.append(OCRLine(text=text, box=line[0], conf=conf))
+        return lines
+
+
+class RapidOCREngine(BaseOCREngine):
+    backend_name = "rapidocr"
+
+    def __init__(self, models_base_dir: Path):
+        # 按官方包名导入：rapidocr-onnxruntime -> rapidocr_onnxruntime
+        from rapidocr_onnxruntime import RapidOCR
+
+        kwargs: dict[str, Any] = {}
+        # 可选：如果用户准备了本地 ONNX 模型，可通过环境变量覆盖路径
+        det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip()
+        cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip()
+        rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip()
+        dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip()
+        if det_path:
+            kwargs["det_model_path"] = det_path
+        if cls_path:
+            kwargs["cls_model_path"] = cls_path
+        if rec_path:
+            kwargs["rec_model_path"] = rec_path
+        if dict_path:
+            kwargs["rec_keys_path"] = dict_path
+
+        self._ocr = RapidOCR(**kwargs)
+        self._models_base_dir = models_base_dir
+
+    def _parse_result_item(self, item: Any) -> Optional[OCRLine]:
+        if isinstance(item, dict):
+            text = str(item.get("text") or item.get("txt") or "").strip()
+            if not text:
+                return None
+            box = item.get("box") or item.get("points")
+            conf = _to_float(item.get("score", item.get("conf")))
+            return OCRLine(text=text, box=box, conf=conf)
+
+        if not isinstance(item, (list, tuple)):
+            return None
+
+        # 常见格式1: [box, text, score]
+        if len(item) >= 2 and isinstance(item[1], str):
+            box = item[0]
+            text = item[1].strip()
+            conf = _to_float(item[2]) if len(item) >= 3 else None
+            if text:
+                return OCRLine(text=text, box=box, conf=conf)
+            return None
+
+        # 常见格式2（Paddle风格）: [box, (text, score)]
+        if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1:
+            text = str(item[1][0]).strip()
+            if not text:
+                return None
+            conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None
+            return OCRLine(text=text, box=item[0], conf=conf)
+
+        return None
+
+    def infer_lines(self, img: Any) -> List[OCRLine]:
+        # RapidOCR 常见返回：(ocr_res, elapse)
+        raw = self._ocr(img)
+        result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw
+        if result is None:
+            return []
+
+        lines: List[OCRLine] = []
+
+        # 一些版本返回对象：boxes/txts/scores
+        if hasattr(result, "boxes") and hasattr(result, "txts"):
+            boxes = list(getattr(result, "boxes") or [])
+            txts = list(getattr(result, "txts") or [])
+            scores = list(getattr(result, "scores") or [])
+            for idx, text in enumerate(txts):
+                t = str(text).strip()
+                if not t:
+                    continue
+                box = boxes[idx] if idx < len(boxes) else None
+                conf = _to_float(scores[idx]) if idx < len(scores) else None
+                lines.append(OCRLine(text=t, box=box, conf=conf))
+            return lines
+
+        if isinstance(result, (list, tuple)):
+            for item in result:
+                parsed = self._parse_result_item(item)
+                if parsed is not None:
+                    lines.append(parsed)
+        return lines
+
+
+def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine:
+    """
+    创建 OCR 引擎。
+
+    环境变量：
+    - POST_OCR_BACKEND: rapidocr | paddle | auto（默认 rapidocr）
+    - POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0（不设置时按后端类型决定）
+    """
+    backend_env = os.environ.get("POST_OCR_BACKEND")
+    backend = (backend_env or "rapidocr").strip().lower() or "rapidocr"
+    fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE")
+    if fallback_env is None or fallback_env.strip() == "":
+        # 规则：
+        # 1) auto 模式默认允许回退
+        # 2) 用户显式指定 rapidocr 时，默认不静默回退（避免“看似切到 rapidocr 实际仍是 paddle”）
+        # 3) 其他场景保持兼容，默认允许回退
+        if backend == "auto":
+            allow_fallback = True
+        elif backend == "rapidocr" and backend_env is not None:
+            allow_fallback = False
+        else:
+            allow_fallback = True
+    else:
+        allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"}
+
+    logger.info(
+        "create_ocr_engine: request=%s explicit=%s fallback=%s python=%s",
+        backend,
+        backend_env is not None,
+        allow_fallback,
+        sys.executable,
+    )
+
+    if backend in {"rapidocr", "onnx"}:
+        try:
+            engine = RapidOCREngine(models_base_dir=models_base_dir)
+            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+            return engine
+        except Exception as e:
+            logger.exception("create_ocr_engine: rapidocr 初始化失败")
+            if allow_fallback:
+                logger.warning("create_ocr_engine: 已回退到 paddle")
+                engine = PaddleOCREngine(models_base_dir=models_base_dir)
+                logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+                return engine
+            raise RuntimeError(
+                "POST_OCR_BACKEND=rapidocr 初始化失败，且未启用回退。"
+                "请先安装 rapidocr-onnxruntime，或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。"
+            ) from e
+
+    if backend == "paddle":
+        engine = PaddleOCREngine(models_base_dir=models_base_dir)
+        logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+        return engine
+
+    # auto: 优先 rapidocr，失败回退 paddle
+    if backend == "auto":
+        try:
+            engine = RapidOCREngine(models_base_dir=models_base_dir)
+            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+            return engine
+        except Exception:
+            logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败，回退 paddle")
+            engine = PaddleOCREngine(models_base_dir=models_base_dir)
+            logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+            return engine
+
+    # 未知值兜底
+    logger.warning("create_ocr_engine: 未知后端 '%s'，回退 paddle", backend)
+    engine = PaddleOCREngine(models_base_dir=models_base_dir)
+    logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
+    return engine
--- a/src/ocr_worker_process.py
+++ b/src/ocr_worker_process.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 # 必须在所有 paddle/numpy import 之前设置，否则 macOS spawn 子进程推理会死锁
 import os
+import logging
 os.environ["OMP_NUM_THREADS"] = "1"
 os.environ["MKL_NUM_THREADS"] = "1"
 os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
 from pathlib import Path
 from typing import Any

-from ocr_offline import create_offline_ocr
+from ocr_engine import create_ocr_engine
 from processor import extract_info

+logger = logging.getLogger("post_ocr.ocr_worker")
+

 def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
    """
@@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
    """
    try:
        response_q.put({"type": "progress", "stage": "init_start"})
-        ocr = create_offline_ocr(models_base_dir=Path(models_base_dir))
-        response_q.put({"type": "ready"})
+        engine = create_ocr_engine(models_base_dir=Path(models_base_dir))
+        response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")})
    except Exception as e:
+        logger.exception("OCR 子进程初始化失败")
        response_q.put({"type": "init_error", "error": str(e)})
        return

@@ -58,31 +62,26 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
                if img is None:
                    continue
                response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"})
-                result = ocr.ocr(img, cls=False)
+                lines = engine.infer_lines(img)
                response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"})
-                if result and result[0]:
-                    for line in result[0]:
-                        if line and len(line) >= 2:
-                            text = str(line[1][0])
-                            ocr_texts.append(text)
-                            conf = None
-                            try:
-                                conf = float(line[1][1])
-                            except Exception:
-                                conf = None
-                            # 将切片内的局部坐标还原为完整 ROI 坐标
-                            box = line[0]
-                            if y_offset and isinstance(box, (list, tuple)):
-                                box = [[p[0], p[1] + y_offset] for p in box]
-                            ocr_lines.append(
-                                {
-                                    "text": text,
-                                    "box": box,
-                                    "conf": conf,
-                                    "source": source,
-                                    "roi_index": roi_index,
-                                }
-                            )
+                for line in lines:
+                    text = str(line.text).strip()
+                    if not text:
+                        continue
+                    ocr_texts.append(text)
+                    # 将切片内的局部坐标还原为完整 ROI 坐标
+                    box = line.box
+                    if y_offset and isinstance(box, (list, tuple)):
+                        box = [[p[0], p[1] + y_offset] for p in box]
+                    ocr_lines.append(
+                        {
+                            "text": text,
+                            "box": box,
+                            "conf": line.conf,
+                            "source": source,
+                            "roi_index": roi_index,
+                        }
+                    )

            record = extract_info(ocr_lines if ocr_lines else ocr_texts)
            response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)})
@@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
                }
            )
        except Exception as e:
+            logger.exception("OCR 子进程处理任务失败 job=%s", job_id)
            response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})