fix: 解决打包后三个运行时问题
- rthook_paddle.py: stub paddle.utils.cpp_extension,避免Cython缺文件崩溃 - build_exe.py: 显式收集paddle DLLs(mklml.dll等) - ocr_offline.py: 非ASCII路径自动复制模型到临时目录,绕过PaddlePaddle C++路径限制 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
├── _internal/ (运行时依赖)
|
||||
└── models/ (OCR 模型,需提前通过 prepare_models.py 准备)
|
||||
"""
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import shutil
|
||||
@@ -16,6 +17,10 @@ from pathlib import Path
|
||||
PROJECT_ROOT = Path(__file__).parent
|
||||
DIST_NAME = "信封信息提取系统"
|
||||
|
||||
# paddle DLLs 所在目录(mklml.dll 等不会被 PyInstaller 自动收集)
|
||||
import paddle as _paddle
|
||||
PADDLE_LIBS = str(Path(_paddle.__file__).parent / "libs")
|
||||
|
||||
|
||||
def build(debug=False):
|
||||
"""使用 PyInstaller 打包(onedir 模式)"""
|
||||
@@ -39,6 +44,10 @@ def build(debug=False):
|
||||
"--hidden-import=ocr_offline",
|
||||
"--hidden-import=paddleocr",
|
||||
"--hidden-import=paddle",
|
||||
# --- paddle DLLs(mklml.dll 等不会被自动收集) ---
|
||||
f"--add-binary={PADDLE_LIBS}/*.dll{os.pathsep}paddle/libs",
|
||||
# --- runtime hook: stub 掉 paddle 开发模块,避免 Cython 缺文件崩溃 ---
|
||||
"--runtime-hook=rthook_paddle.py",
|
||||
# --- 收集 paddleocr 全部数据(模型配置、字典等) ---
|
||||
"--collect-all=paddleocr",
|
||||
# --- 元数据(部分库在运行时通过 importlib.metadata 查版本) ---
|
||||
|
||||
33
rthook_paddle.py
Normal file
33
rthook_paddle.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""
|
||||
PyInstaller runtime hook: stub 掉 paddle 中仅开发时需要的模块,
|
||||
避免打包后因缺少 Cython Utility 文件而崩溃。
|
||||
"""
|
||||
import types
|
||||
import sys
|
||||
|
||||
|
||||
class _Stub(types.ModuleType):
|
||||
"""空模块 stub,所有属性访问返回空类"""
|
||||
def __getattr__(self, name):
|
||||
if name.startswith("_"):
|
||||
raise AttributeError(name)
|
||||
return type(name, (), {})
|
||||
|
||||
|
||||
def _inject(name):
|
||||
if name not in sys.modules:
|
||||
m = _Stub(name)
|
||||
m.__path__ = []
|
||||
m.__package__ = name
|
||||
m.__spec__ = None
|
||||
sys.modules[name] = m
|
||||
|
||||
|
||||
# paddle.utils.cpp_extension 会拉入 Cython 编译器,推理不需要
|
||||
for _p in [
|
||||
"paddle.utils.cpp_extension",
|
||||
"paddle.utils.cpp_extension.cpp_extension",
|
||||
"paddle.utils.cpp_extension.extension_utils",
|
||||
"paddle.utils.cpp_extension.jit_compile",
|
||||
]:
|
||||
_inject(_p)
|
||||
@@ -11,10 +11,43 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
|
||||
def _ensure_ascii_model_dir(model_dir: Path) -> str:
|
||||
"""
|
||||
PaddlePaddle C++ 推理引擎在 Windows 上不支持非 ASCII 路径。
|
||||
如果模型路径含非 ASCII 字符,复制到临时目录(仅首次复制,后续复用)。
|
||||
"""
|
||||
s = str(model_dir)
|
||||
try:
|
||||
s.encode("ascii")
|
||||
return s # 纯 ASCII,直接用
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
|
||||
# 路径含非 ASCII,复制到 %TEMP%/post_ocr_models/<子目录名>
|
||||
safe_base = Path(tempfile.gettempdir()) / "post_ocr_models"
|
||||
safe_dir = safe_base / model_dir.name
|
||||
|
||||
# 用 pdmodel 文件大小做简单校验,避免每次都复制
|
||||
src_marker = model_dir / "inference.pdmodel"
|
||||
dst_marker = safe_dir / "inference.pdmodel"
|
||||
if dst_marker.exists() and dst_marker.stat().st_size == src_marker.stat().st_size:
|
||||
return str(safe_dir)
|
||||
|
||||
# 复制模型
|
||||
log = logging.getLogger("post_ocr.ocr")
|
||||
log.info("模型路径含非ASCII字符,复制到: %s", safe_dir)
|
||||
if safe_dir.exists():
|
||||
shutil.rmtree(safe_dir)
|
||||
shutil.copytree(model_dir, safe_dir)
|
||||
return str(safe_dir)
|
||||
|
||||
|
||||
def _is_frozen() -> bool:
|
||||
"""判断是否为 PyInstaller 打包后的运行环境"""
|
||||
return bool(getattr(sys, "frozen", False))
|
||||
@@ -94,8 +127,9 @@ def create_offline_ocr(models_base_dir: Path | None = None):
|
||||
|
||||
if (det_dir / "inference.pdmodel").exists() and (rec_dir / "inference.pdmodel").exists():
|
||||
log.info("使用离线模型: %s", models_dir)
|
||||
kwargs["det_model_dir"] = str(det_dir)
|
||||
kwargs["rec_model_dir"] = str(rec_dir)
|
||||
kwargs["det_model_dir"] = _ensure_ascii_model_dir(det_dir)
|
||||
kwargs["rec_model_dir"] = _ensure_ascii_model_dir(rec_dir)
|
||||
log.info("det_model_dir=%s, rec_model_dir=%s", kwargs["det_model_dir"], kwargs["rec_model_dir"])
|
||||
else:
|
||||
log.info("未找到离线模型,将使用默认路径(可能需要联网下载)")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user