Compare commits

1 Commits

Author SHA1 Message Date
empty
b68612dd53 feat: 切换可选RapidOCR后端并修复macOS识别卡住 2026-02-25 09:36:37 +08:00
6 changed files with 322 additions and 58 deletions

View File

@@ -28,6 +28,8 @@
sudo apt-get install -y libgl1-mesa-glx libglib2.0-0 sudo apt-get install -y libgl1-mesa-glx libglib2.0-0
# 安装 Python 依赖 # 安装 Python 依赖
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt pip install -r requirements.txt
``` ```
@@ -36,18 +38,42 @@ pip install -r requirements.txt
**命令行批处理** **命令行批处理**
```bash ```bash
# 将图片放入 data/input/ 目录 # 将图片放入 data/input/ 目录
python src/main.py .venv/bin/python src/main.py
# 结果保存在 data/output/result.xlsx # 结果保存在 data/output/result.xlsx
``` ```
**桌面应用** **桌面应用**
```bash ```bash
python src/desktop.py .venv/bin/python src/desktop.py
# 启动 PyQt6 窗口,可选择摄像头实时拍照识别 # 启动 PyQt6 窗口,可选择摄像头实时拍照识别
``` ```
### 3. OCR 后端切换RapidOCR / PaddleOCR
默认后端为 **RapidOCR(ONNX)**,可通过环境变量切换:
```bash
# 默认RapidOCR推荐跨平台更稳
POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py
# 强制使用 PaddleOCR
POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py
# 自动:优先 RapidOCR失败回退 PaddleOCR
POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py
```
常用相关环境变量:
- `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`:是否允许回退到 Paddle默认
- `POST_OCR_BACKEND=auto` 时为 `1`
- 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`
- `POST_OCR_MP_START_METHOD=spawn|fork`:强制指定 OCR 子进程启动方式macOS 默认rapidocr 用 `spawn`paddle 用 `fork`
- `POST_OCR_MAIN_SPLIT=1~4`:主 ROI 分片数(默认 2
- `POST_OCR_MAX_ROI_WIDTH=600+`:识别前缩放宽度上限(默认 960
- `POST_OCR_JOB_TIMEOUT_SEC`:单次识别超时秒数(默认 25
--- ---
## Windows 桌面离线版zip 目录包) ## Windows 桌面离线版zip 目录包)

View File

@@ -1,5 +1,6 @@
# 桌面版依赖(本地电脑安装) # 桌面版依赖(本地电脑安装)
# ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题,必须使用 2.x # ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题,必须使用 2.x
rapidocr-onnxruntime
paddleocr==2.10.0 paddleocr==2.10.0
paddlepaddle==2.6.2 paddlepaddle==2.6.2

View File

@@ -89,15 +89,22 @@ class OCRService(QObject):
super().__init__() super().__init__()
self._models_base_dir = models_base_dir self._models_base_dir = models_base_dir
self._busy = False self._busy = False
self.backend_name = "unknown"
self._stop_event = threading.Event() self._stop_event = threading.Event()
method_default = "fork" if sys.platform == "darwin" else "spawn" backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr"
if sys.platform == "darwin":
# macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差rapidocr 默认走 spawn。
# Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住,因此保留 fork。
method_default = "fork" if backend_req == "paddle" else "spawn"
else:
method_default = "spawn"
method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default
try: try:
self._ctx = mp.get_context(method) self._ctx = mp.get_context(method)
except ValueError: except ValueError:
method = method_default method = method_default
self._ctx = mp.get_context(method_default) self._ctx = mp.get_context(method_default)
logger.info("OCR multiprocessing start_method=%s", method) logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req)
self._req_q = None self._req_q = None
self._resp_q = None self._resp_q = None
self._proc = None self._proc = None
@@ -189,7 +196,12 @@ class OCRService(QObject):
logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix) logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix)
continue continue
if msg_type == "ready": if msg_type == "ready":
logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None)) self.backend_name = str(msg.get("backend", "unknown"))
logger.info(
"OCR 子进程已就绪 pid=%s backend=%s",
getattr(self._proc, "pid", None),
self.backend_name,
)
self.ready.emit() self.ready.emit()
continue continue
if msg_type == "init_error": if msg_type == "init_error":
@@ -448,11 +460,16 @@ class MainWindow(QMainWindow):
def _on_ocr_ready(self) -> None: def _on_ocr_ready(self) -> None:
try: try:
self._ocr_ready = True self._ocr_ready = True
self.statusBar().showMessage("OCR 模型已加载(离线)") backend = "unknown"
try:
backend = str(getattr(self._ocr_service, "backend_name", "unknown"))
except Exception:
backend = "unknown"
self.statusBar().showMessage(f"OCR 模型已加载({backend}")
btn = getattr(self, "btn_capture", None) btn = getattr(self, "btn_capture", None)
if btn is not None: if btn is not None:
btn.setEnabled(self.cap is not None and not self._ocr_busy) btn.setEnabled(self.cap is not None and not self._ocr_busy)
logger.info("OCR ready") logger.info("OCR ready backend=%s", backend)
except Exception as e: except Exception as e:
logger.exception("处理 OCR ready 回调失败:%s", str(e)) logger.exception("处理 OCR ready 回调失败:%s", str(e))

View File

@@ -1,8 +1,10 @@
import os import os
import glob import glob
import cv2
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
from paddleocr import PaddleOCR from pathlib import Path
from ocr_engine import create_ocr_engine
from processor import extract_info, save_to_excel from processor import extract_info, save_to_excel
# 禁用联网检查,加快启动速度 # 禁用联网检查,加快启动速度
@@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
def main(): def main():
# 初始化 PaddleOCR # 初始化 OCR 引擎(默认 rapidocr可通过环境变量切换
ocr = PaddleOCR(use_textline_orientation=True, lang="ch") models_dir = Path("models")
ocr_engine = create_ocr_engine(models_base_dir=models_dir)
input_dir = "data/input" input_dir = "data/input"
output_dir = "data/output" output_dir = "data/output"
@@ -36,31 +39,31 @@ def main():
for img_path in tqdm(image_paths): for img_path in tqdm(image_paths):
try: try:
# 1. 执行 OCR 识别 # 1. 执行 OCR 识别
result = ocr.ocr(img_path, cls=False) img = cv2.imread(img_path)
if img is None:
errors.append(
{"file": os.path.basename(img_path), "error": "图片读取失败"}
)
continue
lines = ocr_engine.infer_lines(img)
# 2. 提取文字行 # 2. 提取文字行
ocr_texts = [] ocr_texts = []
ocr_lines = [] ocr_lines = []
if result and result[0]: for line in lines:
for line in result[0]: text = str(line.text).strip()
# line 格式: [box, (text, confidence)] if not text:
if line and len(line) >= 2: continue
text = str(line[1][0]) ocr_texts.append(text)
ocr_texts.append(text) ocr_lines.append(
conf = None {
try: "text": text,
conf = float(line[1][1]) "box": line.box,
except Exception: "conf": line.conf,
conf = None "source": "main",
ocr_lines.append( "roi_index": 0,
{ }
"text": text, )
"box": line[0],
"conf": conf,
"source": "main",
"roi_index": 0,
}
)
# 3. 结构化解析 # 3. 结构化解析
if ocr_texts: if ocr_texts:

217
src/ocr_engine.py Normal file
View File

@@ -0,0 +1,217 @@
from __future__ import annotations
import logging
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List, Optional
logger = logging.getLogger("post_ocr.ocr_engine")
@dataclass
class OCRLine:
text: str
box: Any
conf: Optional[float] = None
class BaseOCREngine:
backend_name: str = "unknown"
def infer_lines(self, img: Any) -> List[OCRLine]:
raise NotImplementedError
def _to_float(val: Any) -> Optional[float]:
try:
return float(val)
except Exception:
return None
class PaddleOCREngine(BaseOCREngine):
backend_name = "paddle"
def __init__(self, models_base_dir: Path):
from ocr_offline import create_offline_ocr
self._ocr = create_offline_ocr(models_base_dir=models_base_dir)
def infer_lines(self, img: Any) -> List[OCRLine]:
result = self._ocr.ocr(img, cls=False)
lines: List[OCRLine] = []
if result and result[0]:
for line in result[0]:
if not line or len(line) < 2:
continue
text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else ""
if not text:
continue
conf = None
if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
conf = _to_float(line[1][1])
lines.append(OCRLine(text=text, box=line[0], conf=conf))
return lines
class RapidOCREngine(BaseOCREngine):
backend_name = "rapidocr"
def __init__(self, models_base_dir: Path):
# 按官方包名导入rapidocr-onnxruntime -> rapidocr_onnxruntime
from rapidocr_onnxruntime import RapidOCR
kwargs: dict[str, Any] = {}
# 可选:如果用户准备了本地 ONNX 模型,可通过环境变量覆盖路径
det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip()
cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip()
rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip()
dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip()
if det_path:
kwargs["det_model_path"] = det_path
if cls_path:
kwargs["cls_model_path"] = cls_path
if rec_path:
kwargs["rec_model_path"] = rec_path
if dict_path:
kwargs["rec_keys_path"] = dict_path
self._ocr = RapidOCR(**kwargs)
self._models_base_dir = models_base_dir
def _parse_result_item(self, item: Any) -> Optional[OCRLine]:
if isinstance(item, dict):
text = str(item.get("text") or item.get("txt") or "").strip()
if not text:
return None
box = item.get("box") or item.get("points")
conf = _to_float(item.get("score", item.get("conf")))
return OCRLine(text=text, box=box, conf=conf)
if not isinstance(item, (list, tuple)):
return None
# 常见格式1: [box, text, score]
if len(item) >= 2 and isinstance(item[1], str):
box = item[0]
text = item[1].strip()
conf = _to_float(item[2]) if len(item) >= 3 else None
if text:
return OCRLine(text=text, box=box, conf=conf)
return None
# 常见格式2Paddle风格: [box, (text, score)]
if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1:
text = str(item[1][0]).strip()
if not text:
return None
conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None
return OCRLine(text=text, box=item[0], conf=conf)
return None
def infer_lines(self, img: Any) -> List[OCRLine]:
# RapidOCR 常见返回:(ocr_res, elapse)
raw = self._ocr(img)
result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw
if result is None:
return []
lines: List[OCRLine] = []
# 一些版本返回对象boxes/txts/scores
if hasattr(result, "boxes") and hasattr(result, "txts"):
boxes = list(getattr(result, "boxes") or [])
txts = list(getattr(result, "txts") or [])
scores = list(getattr(result, "scores") or [])
for idx, text in enumerate(txts):
t = str(text).strip()
if not t:
continue
box = boxes[idx] if idx < len(boxes) else None
conf = _to_float(scores[idx]) if idx < len(scores) else None
lines.append(OCRLine(text=t, box=box, conf=conf))
return lines
if isinstance(result, (list, tuple)):
for item in result:
parsed = self._parse_result_item(item)
if parsed is not None:
lines.append(parsed)
return lines
def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine:
"""
创建 OCR 引擎。
环境变量:
- POST_OCR_BACKEND: rapidocr | paddle | auto默认 rapidocr
- POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0不设置时按后端类型决定
"""
backend_env = os.environ.get("POST_OCR_BACKEND")
backend = (backend_env or "rapidocr").strip().lower() or "rapidocr"
fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE")
if fallback_env is None or fallback_env.strip() == "":
# 规则:
# 1) auto 模式默认允许回退
# 2) 用户显式指定 rapidocr 时,默认不静默回退(避免“看似切到 rapidocr 实际仍是 paddle”
# 3) 其他场景保持兼容,默认允许回退
if backend == "auto":
allow_fallback = True
elif backend == "rapidocr" and backend_env is not None:
allow_fallback = False
else:
allow_fallback = True
else:
allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"}
logger.info(
"create_ocr_engine: request=%s explicit=%s fallback=%s python=%s",
backend,
backend_env is not None,
allow_fallback,
sys.executable,
)
if backend in {"rapidocr", "onnx"}:
try:
engine = RapidOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
except Exception as e:
logger.exception("create_ocr_engine: rapidocr 初始化失败")
if allow_fallback:
logger.warning("create_ocr_engine: 已回退到 paddle")
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
raise RuntimeError(
"POST_OCR_BACKEND=rapidocr 初始化失败,且未启用回退。"
"请先安装 rapidocr-onnxruntime或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。"
) from e
if backend == "paddle":
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
# auto: 优先 rapidocr失败回退 paddle
if backend == "auto":
try:
engine = RapidOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
except Exception:
logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败,回退 paddle")
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine
# 未知值兜底
logger.warning("create_ocr_engine: 未知后端 '%s',回退 paddle", backend)
engine = PaddleOCREngine(models_base_dir=models_base_dir)
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
return engine

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
# 必须在所有 paddle/numpy import 之前设置,否则 macOS spawn 子进程推理会死锁 # 必须在所有 paddle/numpy import 之前设置,否则 macOS spawn 子进程推理会死锁
import os import os
import logging
os.environ["OMP_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["OPENBLAS_NUM_THREADS"] = "1"
@@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from ocr_offline import create_offline_ocr from ocr_engine import create_ocr_engine
from processor import extract_info from processor import extract_info
logger = logging.getLogger("post_ocr.ocr_worker")
def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None: def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
""" """
@@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
""" """
try: try:
response_q.put({"type": "progress", "stage": "init_start"}) response_q.put({"type": "progress", "stage": "init_start"})
ocr = create_offline_ocr(models_base_dir=Path(models_base_dir)) engine = create_ocr_engine(models_base_dir=Path(models_base_dir))
response_q.put({"type": "ready"}) response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")})
except Exception as e: except Exception as e:
logger.exception("OCR 子进程初始化失败")
response_q.put({"type": "init_error", "error": str(e)}) response_q.put({"type": "init_error", "error": str(e)})
return return
@@ -58,31 +62,26 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
if img is None: if img is None:
continue continue
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"}) response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"})
result = ocr.ocr(img, cls=False) lines = engine.infer_lines(img)
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"}) response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"})
if result and result[0]: for line in lines:
for line in result[0]: text = str(line.text).strip()
if line and len(line) >= 2: if not text:
text = str(line[1][0]) continue
ocr_texts.append(text) ocr_texts.append(text)
conf = None # 将切片内的局部坐标还原为完整 ROI 坐标
try: box = line.box
conf = float(line[1][1]) if y_offset and isinstance(box, (list, tuple)):
except Exception: box = [[p[0], p[1] + y_offset] for p in box]
conf = None ocr_lines.append(
# 将切片内的局部坐标还原为完整 ROI 坐标 {
box = line[0] "text": text,
if y_offset and isinstance(box, (list, tuple)): "box": box,
box = [[p[0], p[1] + y_offset] for p in box] "conf": line.conf,
ocr_lines.append( "source": source,
{ "roi_index": roi_index,
"text": text, }
"box": box, )
"conf": conf,
"source": source,
"roi_index": roi_index,
}
)
record = extract_info(ocr_lines if ocr_lines else ocr_texts) record = extract_info(ocr_lines if ocr_lines else ocr_texts)
response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)}) response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)})
@@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
} }
) )
except Exception as e: except Exception as e:
logger.exception("OCR 子进程处理任务失败 job=%s", job_id)
response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)}) response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})