Compare commits
1 Commits
main
...
codex/rapi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b68612dd53 |
30
README.md
30
README.md
@@ -28,6 +28,8 @@
|
||||
sudo apt-get install -y libgl1-mesa-glx libglib2.0-0
|
||||
|
||||
# 安装 Python 依赖
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
@@ -36,18 +38,42 @@ pip install -r requirements.txt
|
||||
**命令行批处理**
|
||||
```bash
|
||||
# 将图片放入 data/input/ 目录
|
||||
python src/main.py
|
||||
.venv/bin/python src/main.py
|
||||
|
||||
# 结果保存在 data/output/result.xlsx
|
||||
```
|
||||
|
||||
**桌面应用**
|
||||
```bash
|
||||
python src/desktop.py
|
||||
.venv/bin/python src/desktop.py
|
||||
|
||||
# 启动 PyQt6 窗口,可选择摄像头实时拍照识别
|
||||
```
|
||||
|
||||
### 3. OCR 后端切换(RapidOCR / PaddleOCR)
|
||||
|
||||
默认后端为 **RapidOCR(ONNX)**,可通过环境变量切换:
|
||||
|
||||
```bash
|
||||
# 默认:RapidOCR(推荐,跨平台更稳)
|
||||
POST_OCR_BACKEND=rapidocr .venv/bin/python src/desktop.py
|
||||
|
||||
# 强制使用 PaddleOCR
|
||||
POST_OCR_BACKEND=paddle .venv/bin/python src/desktop.py
|
||||
|
||||
# 自动:优先 RapidOCR,失败回退 PaddleOCR
|
||||
POST_OCR_BACKEND=auto .venv/bin/python src/desktop.py
|
||||
```
|
||||
|
||||
常用相关环境变量:
|
||||
- `POST_OCR_BACKEND_FALLBACK_PADDLE=1|0`:是否允许回退到 Paddle(默认:
|
||||
- `POST_OCR_BACKEND=auto` 时为 `1`
|
||||
- 用户显式 `POST_OCR_BACKEND=rapidocr` 时为 `0`)
|
||||
- `POST_OCR_MP_START_METHOD=spawn|fork`:强制指定 OCR 子进程启动方式(macOS 默认:rapidocr 用 `spawn`,paddle 用 `fork`)
|
||||
- `POST_OCR_MAIN_SPLIT=1~4`:主 ROI 分片数(默认 2)
|
||||
- `POST_OCR_MAX_ROI_WIDTH=600+`:识别前缩放宽度上限(默认 960)
|
||||
- `POST_OCR_JOB_TIMEOUT_SEC`:单次识别超时秒数(默认 25)
|
||||
|
||||
---
|
||||
|
||||
## Windows 桌面离线版(zip 目录包)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# 桌面版依赖(本地电脑安装)
|
||||
# ⚠️ PaddleOCR 3.x 有 PIR+oneDNN 兼容性问题,必须使用 2.x
|
||||
rapidocr-onnxruntime
|
||||
paddleocr==2.10.0
|
||||
paddlepaddle==2.6.2
|
||||
|
||||
|
||||
@@ -89,15 +89,22 @@ class OCRService(QObject):
|
||||
super().__init__()
|
||||
self._models_base_dir = models_base_dir
|
||||
self._busy = False
|
||||
self.backend_name = "unknown"
|
||||
self._stop_event = threading.Event()
|
||||
method_default = "fork" if sys.platform == "darwin" else "spawn"
|
||||
backend_req = os.environ.get("POST_OCR_BACKEND", "rapidocr").strip().lower() or "rapidocr"
|
||||
if sys.platform == "darwin":
|
||||
# macOS + PyQt/OpenCV 场景下 fork 对 ONNX 推理稳定性较差,rapidocr 默认走 spawn。
|
||||
# Paddle 在 macOS 历史上与 spawn 组合更容易出现卡住,因此保留 fork。
|
||||
method_default = "fork" if backend_req == "paddle" else "spawn"
|
||||
else:
|
||||
method_default = "spawn"
|
||||
method = os.environ.get("POST_OCR_MP_START_METHOD", method_default).strip() or method_default
|
||||
try:
|
||||
self._ctx = mp.get_context(method)
|
||||
except ValueError:
|
||||
method = method_default
|
||||
self._ctx = mp.get_context(method_default)
|
||||
logger.info("OCR multiprocessing start_method=%s", method)
|
||||
logger.info("OCR multiprocessing start_method=%s (backend_req=%s)", method, backend_req)
|
||||
self._req_q = None
|
||||
self._resp_q = None
|
||||
self._proc = None
|
||||
@@ -189,7 +196,12 @@ class OCRService(QObject):
|
||||
logger.info("OCR 子进程进度 job=%s stage=%s%s", job_id, stage, suffix)
|
||||
continue
|
||||
if msg_type == "ready":
|
||||
logger.info("OCR 子进程已就绪 pid=%s", getattr(self._proc, "pid", None))
|
||||
self.backend_name = str(msg.get("backend", "unknown"))
|
||||
logger.info(
|
||||
"OCR 子进程已就绪 pid=%s backend=%s",
|
||||
getattr(self._proc, "pid", None),
|
||||
self.backend_name,
|
||||
)
|
||||
self.ready.emit()
|
||||
continue
|
||||
if msg_type == "init_error":
|
||||
@@ -448,11 +460,16 @@ class MainWindow(QMainWindow):
|
||||
def _on_ocr_ready(self) -> None:
|
||||
try:
|
||||
self._ocr_ready = True
|
||||
self.statusBar().showMessage("OCR 模型已加载(离线)")
|
||||
backend = "unknown"
|
||||
try:
|
||||
backend = str(getattr(self._ocr_service, "backend_name", "unknown"))
|
||||
except Exception:
|
||||
backend = "unknown"
|
||||
self.statusBar().showMessage(f"OCR 模型已加载({backend})")
|
||||
btn = getattr(self, "btn_capture", None)
|
||||
if btn is not None:
|
||||
btn.setEnabled(self.cap is not None and not self._ocr_busy)
|
||||
logger.info("OCR ready")
|
||||
logger.info("OCR ready backend=%s", backend)
|
||||
except Exception as e:
|
||||
logger.exception("处理 OCR ready 回调失败:%s", str(e))
|
||||
|
||||
|
||||
51
src/main.py
51
src/main.py
@@ -1,8 +1,10 @@
|
||||
import os
|
||||
import glob
|
||||
import cv2
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from paddleocr import PaddleOCR
|
||||
from pathlib import Path
|
||||
from ocr_engine import create_ocr_engine
|
||||
from processor import extract_info, save_to_excel
|
||||
|
||||
# 禁用联网检查,加快启动速度
|
||||
@@ -10,8 +12,9 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
||||
|
||||
|
||||
def main():
|
||||
# 初始化 PaddleOCR
|
||||
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
|
||||
# 初始化 OCR 引擎(默认 rapidocr,可通过环境变量切换)
|
||||
models_dir = Path("models")
|
||||
ocr_engine = create_ocr_engine(models_base_dir=models_dir)
|
||||
|
||||
input_dir = "data/input"
|
||||
output_dir = "data/output"
|
||||
@@ -36,31 +39,31 @@ def main():
|
||||
for img_path in tqdm(image_paths):
|
||||
try:
|
||||
# 1. 执行 OCR 识别
|
||||
result = ocr.ocr(img_path, cls=False)
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
errors.append(
|
||||
{"file": os.path.basename(img_path), "error": "图片读取失败"}
|
||||
)
|
||||
continue
|
||||
lines = ocr_engine.infer_lines(img)
|
||||
|
||||
# 2. 提取文字行
|
||||
ocr_texts = []
|
||||
ocr_lines = []
|
||||
if result and result[0]:
|
||||
for line in result[0]:
|
||||
# line 格式: [box, (text, confidence)]
|
||||
if line and len(line) >= 2:
|
||||
text = str(line[1][0])
|
||||
ocr_texts.append(text)
|
||||
conf = None
|
||||
try:
|
||||
conf = float(line[1][1])
|
||||
except Exception:
|
||||
conf = None
|
||||
ocr_lines.append(
|
||||
{
|
||||
"text": text,
|
||||
"box": line[0],
|
||||
"conf": conf,
|
||||
"source": "main",
|
||||
"roi_index": 0,
|
||||
}
|
||||
)
|
||||
for line in lines:
|
||||
text = str(line.text).strip()
|
||||
if not text:
|
||||
continue
|
||||
ocr_texts.append(text)
|
||||
ocr_lines.append(
|
||||
{
|
||||
"text": text,
|
||||
"box": line.box,
|
||||
"conf": line.conf,
|
||||
"source": "main",
|
||||
"roi_index": 0,
|
||||
}
|
||||
)
|
||||
|
||||
# 3. 结构化解析
|
||||
if ocr_texts:
|
||||
|
||||
217
src/ocr_engine.py
Normal file
217
src/ocr_engine.py
Normal file
@@ -0,0 +1,217 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
logger = logging.getLogger("post_ocr.ocr_engine")
|
||||
|
||||
|
||||
@dataclass
|
||||
class OCRLine:
|
||||
text: str
|
||||
box: Any
|
||||
conf: Optional[float] = None
|
||||
|
||||
|
||||
class BaseOCREngine:
|
||||
backend_name: str = "unknown"
|
||||
|
||||
def infer_lines(self, img: Any) -> List[OCRLine]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _to_float(val: Any) -> Optional[float]:
|
||||
try:
|
||||
return float(val)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class PaddleOCREngine(BaseOCREngine):
|
||||
backend_name = "paddle"
|
||||
|
||||
def __init__(self, models_base_dir: Path):
|
||||
from ocr_offline import create_offline_ocr
|
||||
|
||||
self._ocr = create_offline_ocr(models_base_dir=models_base_dir)
|
||||
|
||||
def infer_lines(self, img: Any) -> List[OCRLine]:
|
||||
result = self._ocr.ocr(img, cls=False)
|
||||
lines: List[OCRLine] = []
|
||||
if result and result[0]:
|
||||
for line in result[0]:
|
||||
if not line or len(line) < 2:
|
||||
continue
|
||||
text = str(line[1][0]) if isinstance(line[1], (list, tuple)) and line[1] else ""
|
||||
if not text:
|
||||
continue
|
||||
conf = None
|
||||
if isinstance(line[1], (list, tuple)) and len(line[1]) >= 2:
|
||||
conf = _to_float(line[1][1])
|
||||
lines.append(OCRLine(text=text, box=line[0], conf=conf))
|
||||
return lines
|
||||
|
||||
|
||||
class RapidOCREngine(BaseOCREngine):
|
||||
backend_name = "rapidocr"
|
||||
|
||||
def __init__(self, models_base_dir: Path):
|
||||
# 按官方包名导入:rapidocr-onnxruntime -> rapidocr_onnxruntime
|
||||
from rapidocr_onnxruntime import RapidOCR
|
||||
|
||||
kwargs: dict[str, Any] = {}
|
||||
# 可选:如果用户准备了本地 ONNX 模型,可通过环境变量覆盖路径
|
||||
det_path = os.environ.get("POST_OCR_RAPID_DET_MODEL", "").strip()
|
||||
cls_path = os.environ.get("POST_OCR_RAPID_CLS_MODEL", "").strip()
|
||||
rec_path = os.environ.get("POST_OCR_RAPID_REC_MODEL", "").strip()
|
||||
dict_path = os.environ.get("POST_OCR_RAPID_KEYS_PATH", "").strip()
|
||||
if det_path:
|
||||
kwargs["det_model_path"] = det_path
|
||||
if cls_path:
|
||||
kwargs["cls_model_path"] = cls_path
|
||||
if rec_path:
|
||||
kwargs["rec_model_path"] = rec_path
|
||||
if dict_path:
|
||||
kwargs["rec_keys_path"] = dict_path
|
||||
|
||||
self._ocr = RapidOCR(**kwargs)
|
||||
self._models_base_dir = models_base_dir
|
||||
|
||||
def _parse_result_item(self, item: Any) -> Optional[OCRLine]:
|
||||
if isinstance(item, dict):
|
||||
text = str(item.get("text") or item.get("txt") or "").strip()
|
||||
if not text:
|
||||
return None
|
||||
box = item.get("box") or item.get("points")
|
||||
conf = _to_float(item.get("score", item.get("conf")))
|
||||
return OCRLine(text=text, box=box, conf=conf)
|
||||
|
||||
if not isinstance(item, (list, tuple)):
|
||||
return None
|
||||
|
||||
# 常见格式1: [box, text, score]
|
||||
if len(item) >= 2 and isinstance(item[1], str):
|
||||
box = item[0]
|
||||
text = item[1].strip()
|
||||
conf = _to_float(item[2]) if len(item) >= 3 else None
|
||||
if text:
|
||||
return OCRLine(text=text, box=box, conf=conf)
|
||||
return None
|
||||
|
||||
# 常见格式2(Paddle风格): [box, (text, score)]
|
||||
if len(item) >= 2 and isinstance(item[1], (list, tuple)) and len(item[1]) >= 1:
|
||||
text = str(item[1][0]).strip()
|
||||
if not text:
|
||||
return None
|
||||
conf = _to_float(item[1][1]) if len(item[1]) >= 2 else None
|
||||
return OCRLine(text=text, box=item[0], conf=conf)
|
||||
|
||||
return None
|
||||
|
||||
def infer_lines(self, img: Any) -> List[OCRLine]:
|
||||
# RapidOCR 常见返回:(ocr_res, elapse)
|
||||
raw = self._ocr(img)
|
||||
result = raw[0] if isinstance(raw, tuple) and len(raw) >= 1 else raw
|
||||
if result is None:
|
||||
return []
|
||||
|
||||
lines: List[OCRLine] = []
|
||||
|
||||
# 一些版本返回对象:boxes/txts/scores
|
||||
if hasattr(result, "boxes") and hasattr(result, "txts"):
|
||||
boxes = list(getattr(result, "boxes") or [])
|
||||
txts = list(getattr(result, "txts") or [])
|
||||
scores = list(getattr(result, "scores") or [])
|
||||
for idx, text in enumerate(txts):
|
||||
t = str(text).strip()
|
||||
if not t:
|
||||
continue
|
||||
box = boxes[idx] if idx < len(boxes) else None
|
||||
conf = _to_float(scores[idx]) if idx < len(scores) else None
|
||||
lines.append(OCRLine(text=t, box=box, conf=conf))
|
||||
return lines
|
||||
|
||||
if isinstance(result, (list, tuple)):
|
||||
for item in result:
|
||||
parsed = self._parse_result_item(item)
|
||||
if parsed is not None:
|
||||
lines.append(parsed)
|
||||
return lines
|
||||
|
||||
|
||||
def create_ocr_engine(models_base_dir: Path) -> BaseOCREngine:
|
||||
"""
|
||||
创建 OCR 引擎。
|
||||
|
||||
环境变量:
|
||||
- POST_OCR_BACKEND: rapidocr | paddle | auto(默认 rapidocr)
|
||||
- POST_OCR_BACKEND_FALLBACK_PADDLE: 1/0(不设置时按后端类型决定)
|
||||
"""
|
||||
backend_env = os.environ.get("POST_OCR_BACKEND")
|
||||
backend = (backend_env or "rapidocr").strip().lower() or "rapidocr"
|
||||
fallback_env = os.environ.get("POST_OCR_BACKEND_FALLBACK_PADDLE")
|
||||
if fallback_env is None or fallback_env.strip() == "":
|
||||
# 规则:
|
||||
# 1) auto 模式默认允许回退
|
||||
# 2) 用户显式指定 rapidocr 时,默认不静默回退(避免“看似切到 rapidocr 实际仍是 paddle”)
|
||||
# 3) 其他场景保持兼容,默认允许回退
|
||||
if backend == "auto":
|
||||
allow_fallback = True
|
||||
elif backend == "rapidocr" and backend_env is not None:
|
||||
allow_fallback = False
|
||||
else:
|
||||
allow_fallback = True
|
||||
else:
|
||||
allow_fallback = fallback_env.strip().lower() not in {"0", "false", "off", "no"}
|
||||
|
||||
logger.info(
|
||||
"create_ocr_engine: request=%s explicit=%s fallback=%s python=%s",
|
||||
backend,
|
||||
backend_env is not None,
|
||||
allow_fallback,
|
||||
sys.executable,
|
||||
)
|
||||
|
||||
if backend in {"rapidocr", "onnx"}:
|
||||
try:
|
||||
engine = RapidOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
except Exception as e:
|
||||
logger.exception("create_ocr_engine: rapidocr 初始化失败")
|
||||
if allow_fallback:
|
||||
logger.warning("create_ocr_engine: 已回退到 paddle")
|
||||
engine = PaddleOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
raise RuntimeError(
|
||||
"POST_OCR_BACKEND=rapidocr 初始化失败,且未启用回退。"
|
||||
"请先安装 rapidocr-onnxruntime,或设置 POST_OCR_BACKEND_FALLBACK_PADDLE=1。"
|
||||
) from e
|
||||
|
||||
if backend == "paddle":
|
||||
engine = PaddleOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
|
||||
# auto: 优先 rapidocr,失败回退 paddle
|
||||
if backend == "auto":
|
||||
try:
|
||||
engine = RapidOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
except Exception:
|
||||
logger.exception("create_ocr_engine: auto 模式 rapidocr 初始化失败,回退 paddle")
|
||||
engine = PaddleOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
|
||||
# 未知值兜底
|
||||
logger.warning("create_ocr_engine: 未知后端 '%s',回退 paddle", backend)
|
||||
engine = PaddleOCREngine(models_base_dir=models_base_dir)
|
||||
logger.info("create_ocr_engine: using backend=%s", engine.backend_name)
|
||||
return engine
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
# 必须在所有 paddle/numpy import 之前设置,否则 macOS spawn 子进程推理会死锁
|
||||
import os
|
||||
import logging
|
||||
os.environ["OMP_NUM_THREADS"] = "1"
|
||||
os.environ["MKL_NUM_THREADS"] = "1"
|
||||
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
||||
@@ -13,9 +14,11 @@ os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from ocr_offline import create_offline_ocr
|
||||
from ocr_engine import create_ocr_engine
|
||||
from processor import extract_info
|
||||
|
||||
logger = logging.getLogger("post_ocr.ocr_worker")
|
||||
|
||||
|
||||
def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
|
||||
"""
|
||||
@@ -25,9 +28,10 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
|
||||
"""
|
||||
try:
|
||||
response_q.put({"type": "progress", "stage": "init_start"})
|
||||
ocr = create_offline_ocr(models_base_dir=Path(models_base_dir))
|
||||
response_q.put({"type": "ready"})
|
||||
engine = create_ocr_engine(models_base_dir=Path(models_base_dir))
|
||||
response_q.put({"type": "ready", "backend": getattr(engine, "backend_name", "unknown")})
|
||||
except Exception as e:
|
||||
logger.exception("OCR 子进程初始化失败")
|
||||
response_q.put({"type": "init_error", "error": str(e)})
|
||||
return
|
||||
|
||||
@@ -58,31 +62,26 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
|
||||
if img is None:
|
||||
continue
|
||||
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_start"})
|
||||
result = ocr.ocr(img, cls=False)
|
||||
lines = engine.infer_lines(img)
|
||||
response_q.put({"type": "progress", "job_id": int(job_id), "stage": f"roi_{roi_index}_done"})
|
||||
if result and result[0]:
|
||||
for line in result[0]:
|
||||
if line and len(line) >= 2:
|
||||
text = str(line[1][0])
|
||||
ocr_texts.append(text)
|
||||
conf = None
|
||||
try:
|
||||
conf = float(line[1][1])
|
||||
except Exception:
|
||||
conf = None
|
||||
# 将切片内的局部坐标还原为完整 ROI 坐标
|
||||
box = line[0]
|
||||
if y_offset and isinstance(box, (list, tuple)):
|
||||
box = [[p[0], p[1] + y_offset] for p in box]
|
||||
ocr_lines.append(
|
||||
{
|
||||
"text": text,
|
||||
"box": box,
|
||||
"conf": conf,
|
||||
"source": source,
|
||||
"roi_index": roi_index,
|
||||
}
|
||||
)
|
||||
for line in lines:
|
||||
text = str(line.text).strip()
|
||||
if not text:
|
||||
continue
|
||||
ocr_texts.append(text)
|
||||
# 将切片内的局部坐标还原为完整 ROI 坐标
|
||||
box = line.box
|
||||
if y_offset and isinstance(box, (list, tuple)):
|
||||
box = [[p[0], p[1] + y_offset] for p in box]
|
||||
ocr_lines.append(
|
||||
{
|
||||
"text": text,
|
||||
"box": box,
|
||||
"conf": line.conf,
|
||||
"source": source,
|
||||
"roi_index": roi_index,
|
||||
}
|
||||
)
|
||||
|
||||
record = extract_info(ocr_lines if ocr_lines else ocr_texts)
|
||||
response_q.put({"type": "progress", "job_id": int(job_id), "stage": "parse_done", "texts": len(ocr_texts)})
|
||||
@@ -95,4 +94,5 @@ def run_ocr_worker(models_base_dir: str, request_q, response_q) -> None:
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("OCR 子进程处理任务失败 job=%s", job_id)
|
||||
response_q.put({"type": "error", "job_id": int(job_id), "error": str(e)})
|
||||
|
||||
Reference in New Issue
Block a user