diff --git a/README.md b/README.md index fff366f..0996abc 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ sudo apt-get install -y libgl1-mesa-glx libglib2.0-0 # 安装 Python 依赖 +python -m venv .venv +source .venv/bin/activate pip install -r requirements.txt ``` @@ -36,18 +38,42 @@ pip install -r requirements.txt **命令行批处理** ```bash # 将图片放入 data/input/ 目录 -python src/main.py +.venv/bin/python src/main.py # 结果保存在 data/output/result.xlsx ``` **桌面应用** ```bash -python src/desktop.py +.venv/bin/python src/desktop.py # 启动 PyQt6 窗口,可选择摄像头实时拍照识别 ``` +### 3. OCR 后端切换(RapidOCR / PaddleOCR) + +默认后端为 **RapidOCR(ONNX)**,可通过环境变量切换: + +```bash +# 默认:RapidOCR(推荐,跨平台更稳) +POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py + +# 强制使用 PaddleOCR +POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py + +# 自动:优先 RapidOCR,失败回退 PaddleOCR +POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py +``` + +常用相关环境变量: +- `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`:是否允许回退到 Paddle(默认: + - `POST_OCR_BACKEND=auto` 时为 `1` + - 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`) +- `POST_OCR_MP_START_METHOD=spawn|fork`:强制指定 OCR 子进程启动方式(macOS 默认:rapidocr 用 `spawn`,paddle 用 `fork`) +- `POST_OCR_MAIN_SPLIT=1~4`:主 ROI 分片数(默认 2) +- `POST_OCR_MAX_ROI_WIDTH=600+`:识别前缩放宽度上限(默认 960) +- `POST_OCR_JOB_TIMEOUT_SEC`:单次识别超时秒数(默认 25) + --- ## Windows 桌面离线版(zip 目录包) diff --git a/requirements.txt b/requirements.txt index 1cdc0e0..c1e1181 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # 桌面版依赖(本地电脑安装) # ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题,必须使用 2.x +rapidocr-onnxruntime paddleocr==2.10.0 paddlepaddle==2.6.2 diff --git a/src/desktop.py b/src/desktop.py index fc42805..83d6a06 100644 --- a/src/desktop.py +++ b/src/desktop.py @@ -89,15 +89,22 @@ class OCRService(QObject): super().__init__() self._models_base_dir = models_base_dir self._busy = False + self.backend_name = "unknown" self._stop_event = threading.Event() - method_default = "fork" if sys.platform == "darwin" else "spawn" + backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr" + if sys.platform == "darwin": + # macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差,rapidocr 默认走 spawn。 + # Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住,因此保留 fork。 + method_default = "fork" if backend_req == "paddle" else "spawn" + else: + method_default = "spawn" method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default try: self._ctx = mp.get_context(method) except ValueError: method = method_default self._ctx = mp.get_context(method_default) - logger.info("OCR multiprocessing start_method=%s", method) + logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req) self._req_q = None self._resp_q = None self._proc = None @@ -189,7 +196,12 @@ class OCRService(QObject): logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix) continue if msg_type == "ready": - logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None)) + self.backend_name = str(msg.get("backend", "unknown")) + logger.info( + "OCR 子进程已就绪 pid=%s backend=%s", + getattr(self._proc, "pid", None), + self.backend_name, + ) self.ready.emit() continue if msg_type == "init_error": @@ -448,11 +460,16 @@ class MainWindow(QMainWindow): def _on_ocr_ready(self) -> None: try: self._ocr_ready = True - self.statusBar().showMessage("OCR 模型已加载(离线)") + backend = "unknown" + try: + backend = str(getattr(self._ocr_service, "backend_name", "unknown")) + except Exception: + backend = "unknown" + self.statusBar().showMessage(f"OCR 模型已加载({backend})") btn = getattr(self, "btn_capture", None) if btn is not None: btn.setEnabled(self.cap is not None and not self._ocr_busy) - logger.info("OCR ready") + logger.info("OCR ready backend=%s", backend) except Exception as e: logger.exception("处理 OCR ready 回调失败:%s", str(e)) diff --git a/src/main.py b/src/main.py index 339706c..e453070 100644 --- a/src/main.py +++ b/src/main.py @@ -1,8 +1,10 @@ import os import glob +import cv2 import pandas as pd from tqdm import tqdm -from paddleocr import PaddleOCR +from pathlib import Path +from ocr_engine import create_ocr_engine from processor import extract_info, save_to_excel # 禁用联网检查,加快启动速度 @@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" def main(): - # 初始化 PaddleOCR - ocr = PaddleOCR(use_textline_orientation=True, lang="ch") + # 初始化 OCR 引擎(默认 rapidocr,可通过环境变量切换) + models_dir = Path("models") + ocr_engine = create_ocr_engine(models_base_dir=models_dir) input_dir = "data/input" output_dir = "data/output" @@ -36,31 +39,31 @@ def main(): for img_path in tqdm(image_paths): try: # 1. 执行 OCR 识别 - result = ocr.ocr(img_path, cls=False) + img = cv2.imread(img_path) + if img is None: + errors.append( + {"file": os.path.basename(img_path), "error": "图片读取失败"} + ) + continue + lines = ocr_engine.infer_lines(img) # 2. 提取文字行 ocr_texts = [] ocr_lines = [] - if result and result[0]: - for line in result[0]: - # line 格式: [box, (text, confidence)] - if line and len(line) >= 2: - text = str(line[1][0]) - ocr_texts.append(text) - conf = None - try: - conf = float(line[1][1]) - except Exception: - conf = None - ocr_lines.append( - { - "text": text, - "box": line[0], - "conf": conf, - "source": "main", - "roi_index": 0, - } - ) + for line in lines: + text = str(line.text).strip() + if not text: + continue + ocr_texts.append(text) + ocr_lines.append( + { + "text": text, + "box": line.box, + "conf": line.conf, + "source": "main", + "roi_index": 0, + } + ) # 3. 结构化解析 if ocr_texts: diff --git a/src/ocr_engine.py b/src/ocr_engine.py new file mode 100644 index 0000000..95ebfc9 --- /dev/null +++ b/src/ocr_engine.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, List, Optional + +logger = logging.getLogger("post_ocr.ocr_engine") + + +@dataclass +class OCRLine: + text: str + box: Any + conf: Optional[float] = None + + +class BaseOCREngine: + backend_name: str = "unknown" + + def infer_lines(self, img: Any) -> List[OCRLine]: + raise NotImplementedError + + +def _to_float(val: Any) -> Optional[float]: + try: + return float(val) + except Exception: + return None + + +class PaddleOCREngine(BaseOCREngine): + backend_name = "paddle" + + def __init__(self, models_base_dir: Path): + from ocr_offline import create_offline_ocr + + self._ocr = create_offline_ocr(models_base_dir=models_base_dir) + + def infer_lines(self, img: Any) -> List[OCRLine]: + result = self._ocr.ocr(img, cls=False) + lines: List[OCRLine] = [] + if result and result[0]: + for line in result[0]: + if not line or len(line) < 2: + continue + text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else "" + if not text: + continue + conf = None + if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2: + conf = _to_float(line[1][1]) + lines.append(OCRLine(text=text, box=line[0], conf=conf)) + return lines + + +class RapidOCREngine(BaseOCREngine): + backend_name = "rapidocr" + + def __init__(self, models_base_dir: Path): + # 按官方包名导入:rapidocr-onnxruntime -> rapidocr_onnxruntime + from rapidocr_onnxruntime import RapidOCR + + kwargs: dict[str, Any] = {} + # 可选:如果用户准备了本地 ONNX 模型,可通过环境变量覆盖路径 + det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip() + cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip() + rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip() + dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip() + if det_path: + kwargs["det_model_path"] = det_path + if cls_path: + kwargs["cls_model_path"] = cls_path + if rec_path: + kwargs["rec_model_path"] = rec_path + if dict_path: + kwargs["rec_keys_path"] = dict_path + + self._ocr = RapidOCR(**kwargs) + self._models_base_dir = models_base_dir + + def _parse_result_item(self, item: Any) -> Optional[OCRLine]: + if isinstance(item, dict): + text = str(item.get("text") or item.get("txt") or "").strip() + if not text: + return None + box = item.get("box") or item.get("points") + conf = _to_float(item.get("score", item.get("conf"))) + return OCRLine(text=text, box=box, conf=conf) + + if not isinstance(item, (list, tuple)): + return None + + # 常见格式1: [box, text, score] + if len(item) >= 2 and isinstance(item[1], str): + box = item[0] + text = item[1].strip() + conf = _to_float(item[2]) if len(item) >= 3 else None + if text: + return OCRLine(text=text, box=box, conf=conf) + return None + + # 常见格式2(Paddle风格): [box, (text, score)] + if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1: + text = str(item[1][0]).strip() + if not text: + return None + conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None + return OCRLine(text=text, box=item[0], conf=conf) + + return None + + def infer_lines(self, img: Any) -> List[OCRLine]: + # RapidOCR 常见返回:(ocr_res, elapse) + raw = self._ocr(img) + result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw + if result is None: + return [] + + lines: List[OCRLine] = [] + + # 一些版本返回对象:boxes/txts/scores + if hasattr(result, "boxes") and hasattr(result, "txts"): + boxes = list(getattr(result, "boxes") or []) + txts = list(getattr(result, "txts") or []) + scores = list(getattr(result, "scores") or []) + for idx, text in enumerate(txts): + t = str(text).strip() + if not t: + continue + box = boxes[idx] if idx < len(boxes) else None + conf = _to_float(scores[idx]) if idx < len(scores) else None + lines.append(OCRLine(text=t, box=box, conf=conf)) + return lines + + if isinstance(result, (list, tuple)): + for item in result: + parsed = self._parse_result_item(item) + if parsed is not None: + lines.append(parsed) + return lines + + +def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine: + """ + 创建 OCR 引擎。 + + 环境变量: + - POST_OCR_BACKEND: rapidocr | paddle | auto(默认 rapidocr) + - POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0(不设置时按后端类型决定) + """ + backend_env = os.environ.get("POST_OCR_BACKEND") + backend = (backend_env or "rapidocr").strip().lower() or "rapidocr" + fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE") + if fallback_env is None or fallback_env.strip() == "": + # 规则: + # 1) auto 模式默认允许回退 + # 2) 用户显式指定 rapidocr 时,默认不静默回退(避免“看似切到 rapidocr 实际仍是 paddle”) + # 3) 其他场景保持兼容,默认允许回退 + if backend == "auto": + allow_fallback = True + elif backend == "rapidocr" and backend_env is not None: + allow_fallback = False + else: + allow_fallback = True + else: + allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"} + + logger.info( + "create_ocr_engine: request=%s explicit=%s fallback=%s python=%s", + backend, + backend_env is not None, + allow_fallback, + sys.executable, + ) + + if backend in {"rapidocr", "onnx"}: + try: + engine = RapidOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine + except Exception as e: + logger.exception("create_ocr_engine: rapidocr 初始化失败") + if allow_fallback: + logger.warning("create_ocr_engine: 已回退到 paddle") + engine = PaddleOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine + raise RuntimeError( + "POST_OCR_BACKEND=rapidocr 初始化失败,且未启用回退。" + "请先安装 rapidocr-onnxruntime,或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。" + ) from e + + if backend == "paddle": + engine = PaddleOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine + + # auto: 优先 rapidocr,失败回退 paddle + if backend == "auto": + try: + engine = RapidOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine + except Exception: + logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败,回退 paddle") + engine = PaddleOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine + + # 未知值兜底 + logger.warning("create_ocr_engine: 未知后端 '%s',回退 paddle", backend) + engine = PaddleOCREngine(models_base_dir=models_base_dir) + logger.info("create_ocr_engine: using backend=%s", engine.backend_name) + return engine diff --git a/src/ocr_worker_process.py b/src/ocr_worker_process.py index 0704a0e..a32f2d7 100644 --- a/src/ocr_worker_process.py +++ b/src/ocr_worker_process.py @@ -2,6 +2,7 @@ from __future__ import annotations # 必须在所有 paddle/numpy import 之前设置,否则 macOS spawn 子进程推理会死锁 import os +import logging os.environ["OMP_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1" os.environ["OPENBLAS_NUM_THREADS"] = "1" @@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True" from pathlib import Path from typing import Any -from ocr_offline import create_offline_ocr +from ocr_engine import create_ocr_engine from processor import extract_info +logger = logging.getLogger("post_ocr.ocr_worker") + def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None: """ @@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None: """ try: response_q.put({"type": "progress", "stage": "init_start"}) - ocr = create_offline_ocr(models_base_dir=Path(models_base_dir)) - response_q.put({"type": "ready"}) + engine = create_ocr_engine(models_base_dir=Path(models_base_dir)) + response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")}) except Exception as e: + logger.exception("OCR 子进程初始化失败") response_q.put({"type": "init_error", "error": str(e)}) return @@ -58,31 +62,26 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None: if img is None: continue response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"}) - result = ocr.ocr(img, cls=False) + lines = engine.infer_lines(img) response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"}) - if result and result[0]: - for line in result[0]: - if line and len(line) >= 2: - text = str(line[1][0]) - ocr_texts.append(text) - conf = None - try: - conf = float(line[1][1]) - except Exception: - conf = None - # 将切片内的局部坐标还原为完整 ROI 坐标 - box = line[0] - if y_offset and isinstance(box, (list, tuple)): - box = [[p[0], p[1] + y_offset] for p in box] - ocr_lines.append( - { - "text": text, - "box": box, - "conf": conf, - "source": source, - "roi_index": roi_index, - } - ) + for line in lines: + text = str(line.text).strip() + if not text: + continue + ocr_texts.append(text) + # 将切片内的局部坐标还原为完整 ROI 坐标 + box = line.box + if y_offset and isinstance(box, (list, tuple)): + box = [[p[0], p[1] + y_offset] for p in box] + ocr_lines.append( + { + "text": text, + "box": box, + "conf": line.conf, + "source": source, + "roi_index": roi_index, + } + ) record = extract_info(ocr_lines if ocr_lines else ocr_texts) response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)}) @@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None: } ) except Exception as e: + logger.exception("OCR 子进程处理任务失败 job=%s", job_id) response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})