Compare commits

1 Commits

Author SHA1 Message Date
empty
b68612dd53 feat: 切换可选RapidOCR后端并修复macOS识别卡住 2026-02-25 09:36:37 +08:00
6 changed files with 322 additions and 58 deletions

View File

@@ -28,6 +28,8 @@
sudo apt-get install -y libgl1-mesa-glx libglib2.0-0
# 安装 Python 依赖
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
```
@@ -36,18 +38,42 @@ pip install -r requirements.txt
**命令行批处理**
```bash
# 将图片放入 data/input/ 目录
python src/main.py
.venv/bin/python src/main.py
# 结果保存在 data/output/result.xlsx
```
**桌面应用**
```bash
python src/desktop.py
.venv/bin/python src/desktop.py
# 启动 PyQt6 窗口,可选择摄像头实时拍照识别
```
### 3. OCR 后端切换RapidOCR / PaddleOCR
默认后端为 **RapidOCR(ONNX)**,可通过环境变量切换:
```bash
# 默认RapidOCR推荐跨平台更稳
POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py
# 强制使用 PaddleOCR
POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py
# 自动:优先 RapidOCR失败回退 PaddleOCR
POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py
```
常用相关环境变量:
- `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`:是否允许回退到 Paddle默认
- `POST_OCR_BACKEND=auto` 时为 `1`
- 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`
- `POST_OCR_MP_START_METHOD=spawn|fork`:强制指定 OCR 子进程启动方式macOS 默认rapidocr 用 `spawn`paddle 用 `fork`
- `POST_OCR_MAIN_SPLIT=1~4`:主 ROI 分片数(默认 2
- `POST_OCR_MAX_ROI_WIDTH=600+`:识别前缩放宽度上限(默认 960
- `POST_OCR_JOB_TIMEOUT_SEC`:单次识别超时秒数(默认 25
---
## Windows 桌面离线版zip 目录包)

View File

@@ -1,5 +1,6 @@
# 桌面版依赖(本地电脑安装)
# ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题,必须使用 2.x
rapidocr-onnxruntime
paddleocr==2.10.0
paddlepaddle==2.6.2

View File

@@ -89,15 +89,22 @@ class OCRService(QObject):
super().__init__()
self._models_base_dir = models_base_dir
self._busy = False
self.backend_name = "unknown"
self._stop_event = threading.Event()
method_default = "fork" if sys.platform == "darwin" else "spawn"
backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr"
if sys.platform == "darwin":
# macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差rapidocr 默认走 spawn。
# Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住,因此保留 fork。
method_default = "fork" if backend_req == "paddle" else "spawn"
else:
method_default = "spawn"
method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default
try:
self._ctx = mp.get_context(method)
except ValueError:
method = method_default
self._ctx = mp.get_context(method_default)
logger.info("OCR multiprocessing start_method=%s", method)
logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req)
self._req_q = None
self._resp_q = None
self._proc = None
@@ -189,7 +196,12 @@ class OCRService(QObject):
logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix)
continue
if msg_type == "ready":
logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None))
self.backend_name = str(msg.get("backend", "unknown"))
logger.info(
"OCR 子进程已就绪 pid=%s backend=%s",
getattr(self._proc, "pid", None),
self.backend_name,
)
self.ready.emit()
continue
if msg_type == "init_error":
@@ -448,11 +460,16 @@ class MainWindow(QMainWindow):
def _on_ocr_ready(self) -> None:
try:
self._ocr_ready = True
self.statusBar().showMessage("OCR 模型已加载(离线)")
backend = "unknown"
try:
backend = str(getattr(self._ocr_service, "backend_name", "unknown"))
except Exception:
backend = "unknown"
self.statusBar().showMessage(f"OCR 模型已加载({backend}")
btn = getattr(self, "btn_capture", None)
if btn is not None:
btn.setEnabled(self.cap is not None and not self._ocr_busy)
logger.info("OCR ready")
logger.info("OCR ready backend=%s", backend)
except Exception as e:
logger.exception("处理 OCR ready 回调失败:%s", str(e))

View File

@@ -1,8 +1,10 @@
import os
import glob
import cv2
import pandas as pd
from tqdm import tqdm
from paddleocr import PaddleOCR
from pathlib import Path
from ocr_engine import create_ocr_engine
from processor import extract_info, save_to_excel
# 禁用联网检查,加快启动速度
@@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
def main():
# 初始化 PaddleOCR
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
# 初始化 OCR 引擎(默认 rapidocr可通过环境变量切换
models_dir = Path("models")
ocr_engine = create_ocr_engine(models_base_dir=models_dir)
input_dir = "data/input"
output_dir = "data/output"
@@ -36,27 +39,27 @@ def main():
for img_path in tqdm(image_paths):
try:
# 1. 执行 OCR 识别
result = ocr.ocr(img_path, cls=False)
img = cv2.imread(img_path)
if img is None:
errors.append(
{"file": os.path.basename(img_path), "error": "图片读取失败"}
)
continue
lines = ocr_engine.infer_lines(img)
# 2. 提取文字行
ocr_texts = []
ocr_lines = []
if result and result[0]:
for line in result[0]:
# line 格式: [box, (text, confidence)]
if line and len(line) >= 2:
text = str(line[1][0])
for line in lines:
text = str(line.text).strip()
if not text:
continue
ocr_texts.append(text)
conf = None
try:
conf = float(line[1][1])
except Exception:
conf = None
ocr_lines.append(
{
"text": text,
"box": line[0],
"conf": conf,
"box": line.box,
"conf": line.conf,
"source": "main",
"roi_index": 0,
}

217
src/ocr_engine.py Normal file
View File

@@ -0,0 +1,217 @@
from __future__ import annotations
import logging
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List, Optional
logger = logging.getLogger("post_ocr.ocr_engine")
@dataclass
class OCRLine:
text: str
box: Any
conf: Optional[float] = None
class BaseOCREngine:
backend_name: str = "unknown"
def infer_lines(self, img: Any) -> List[OCRLine]:
raise NotImplementedError
def _to_float(val: Any) -> Optional[float]:
try:
return float(val)
except Exception:
return None
class PaddleOCREngine(BaseOCREngine):
backend_name = "paddle"
def __init__(self, models_base_dir: Path):
from ocr_offline import create_offline_ocr
self._ocr = create_offline_ocr(models_base_dir=models_base_dir)
def infer_lines(self, img: Any) -> List[OCRLine]:
result = self._ocr.ocr(img, cls=False)
lines: List[OCRLine] = []
if result and result[0]:
for line in result[0]:
if not line or len(line) < 2:
continue
text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else ""
if not text:
continue
conf = None
if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
conf = _to_float(line[1][1])
lines.append(OCRLine(text=text, box=line[0], conf=conf))
return lines
class RapidOCREngine(BaseOCREngine):
backend_name = "rapidocr"
def __init__(self, models_base_dir: Path):
# 按官方包名导入rapidocr-onnxruntime -> rapidocr_onnxruntime
from rapidocr_onnxruntime import RapidOCR
kwargs: dict[str, Any] = {}
# 可选:如果用户准备了本地 ONNX 模型,可通过环境变量覆盖路径
det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip()
cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip()
rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip()
dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip()
if det_path:
kwargs["det_model_path"] = det_path
if cls_path:
kwargs["cls_model_path"] = cls_path
if rec_path:
kwargs["rec_model_path"] = rec_path
if dict_path:
kwargs["rec_keys_path"] = dict_path
self._ocr = RapidOCR(**kwargs)
self._models_base_dir = models_base_dir
def _parse_result_item(self, item: Any) -> Optional[OCRLine]:
if isinstance(item, dict):
text = str(item.get("text") or item.get("txt") or "").strip()
if not text:
return None
box = item.get("box") or item.get("points")
conf = _to_float(item.get("score", item.get("conf")))
return OCRLine(text=text, box=box, conf=conf)
if not isinstance(item, (list, tuple)):
return None
# 常见格式1: [box, text, score]
if len(item) >= 2 and isinstance(item[1], str):
box = item[0]
text = item[1].strip()
conf = _to_float(item[2]) if len(item) >= 3 else None
if text:
return OCRLine(text=text, box=box, conf=conf)
return None
# 常见格式2Paddle风格: [box, (text, score)]
if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1:
text = str(item[1][0]).strip()
if not text:
return None
conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None
return OCRLine(text=text, box=item[0], conf=conf)
return None
def infer_lines(self, img: Any) -> List[OCRLine]:
# RapidOCR 常见返回:(ocr_res, elapse)
raw = self._ocr(img)
result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw
if result is None:
return []
lines: List[OCRLine] = []
# 一些版本返回对象boxes/txts/scores
if hasattr(result, "boxes") and hasattr(result, "txts"):
boxes = list(getattr(result, "boxes") or [])
txts = list(getattr(result, "txts") or [])
scores = list(getattr(result, "scores") or [])
for idx, text in enumerate(txts):
t = str(text).strip()
if not t:
continue
box = boxes[idx] if idx < len(boxes) else None
conf = _to_float(scores[idx]) if idx < len(scores) else None
lines.append(OCRLine(text=t, box=box, conf=conf))
return lines
if isinstance(result, (list, tuple)):
for item in result:
parsed = self._parse_result_item(item)
if parsed is not None:
lines.append(parsed)
return lines
def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine:
"""
创建 OCR 引擎。
环境变量:
- POST_OCR_BACKEND: rapidocr | paddle | auto默认 rapidocr
- POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0不设置时按后端类型决定
"""
backend_env = os.environ.get("POST_OCR_BACKEND")
backend = (backend_env or "rapidocr").strip().lower() or "rapidocr"
fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE")
if fallback_env is None or fallback_env.strip() == "":
# 规则:
# 1) auto 模式默认允许回退
# 2) 用户显式指定 rapidocr 时,默认不静默回退(避免“看似切到 rapidocr 实际仍是 paddle”
# 3) 其他场景保持兼容,默认允许回退
if backend == "auto":
allow_fallback = True
elif backend == "rapidocr" and backend_env is not None:
allow_fallback = False
else:
allow_fallback = True
else:
allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"}
logger.info(
"create_ocr_engine: request=%s explicit=%s fallback=%s python=%s",
backend,
backend_env is not None,
allow_fallback,
sys.executable,
)
if backend in {"rapidocr", "onnx"}:
try:
engine = RapidOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
except Exception as e:
logger.exception("create_ocr_engine: rapidocr 初始化失败")
if allow_fallback:
logger.warning("create_ocr_engine: 已回退到 paddle")
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
raise RuntimeError(
"POST_OCR_BACKEND=rapidocr 初始化失败,且未启用回退。"
"请先安装 rapidocr-onnxruntime或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。"
) from e
if backend == "paddle":
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
# auto: 优先 rapidocr失败回退 paddle
if backend == "auto":
try:
engine = RapidOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
except Exception:
logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败,回退 paddle")
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
# 未知值兜底
logger.warning("create_ocr_engine: 未知后端 '%s',回退 paddle", backend)
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
# 必须在所有 paddle/numpy import 之前设置,否则 macOS spawn 子进程推理会死锁
import os
import logging
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
from pathlib import Path
from typing import Any
from ocr_offline import create_offline_ocr
from ocr_engine import create_ocr_engine
from processor import extract_info
logger = logging.getLogger("post_ocr.ocr_worker")
def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
"""
@@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
"""
try:
response_q.put({"type": "progress", "stage": "init_start"})
ocr = create_offline_ocr(models_base_dir=Path(models_base_dir))
response_q.put({"type": "ready"})
engine = create_ocr_engine(models_base_dir=Path(models_base_dir))
response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")})
except Exception as e:
logger.exception("OCR 子进程初始化失败")
response_q.put({"type": "init_error", "error": str(e)})
return
@@ -58,27 +62,22 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
if img is None:
continue
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"})
result = ocr.ocr(img, cls=False)
lines = engine.infer_lines(img)
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"})
if result and result[0]:
for line in result[0]:
if line and len(line) >= 2:
text = str(line[1][0])
for line in lines:
text = str(line.text).strip()
if not text:
continue
ocr_texts.append(text)
conf = None
try:
conf = float(line[1][1])
except Exception:
conf = None
# 将切片内的局部坐标还原为完整 ROI 坐标
box = line[0]
box = line.box
if y_offset and isinstance(box, (list, tuple)):
box = [[p[0], p[1] + y_offset] for p in box]
ocr_lines.append(
{
"text": text,
"box": box,
"conf": conf,
"conf": line.conf,
"source": source,
"roi_index": roi_index,
}
@@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
}
)
except Exception as e:
logger.exception("OCR 子进程处理任务失败 job=%s", job_id)
response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})