fix: 解决打包后三个运行时问题
- rthook_paddle.py: stub paddle.utils.cpp_extension,避免Cython缺文件崩溃 - build_exe.py: 显式收集paddle DLLs(mklml.dll等) - ocr_offline.py: 非ASCII路径自动复制模型到临时目录,绕过PaddlePaddle C++路径限制 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,7 @@
|
|||||||
├── _internal/ (运行时依赖)
|
├── _internal/ (运行时依赖)
|
||||||
└── models/ (OCR 模型,需提前通过 prepare_models.py 准备)
|
└── models/ (OCR 模型,需提前通过 prepare_models.py 准备)
|
||||||
"""
|
"""
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
@@ -16,6 +17,10 @@ from pathlib import Path
|
|||||||
PROJECT_ROOT = Path(__file__).parent
|
PROJECT_ROOT = Path(__file__).parent
|
||||||
DIST_NAME = "信封信息提取系统"
|
DIST_NAME = "信封信息提取系统"
|
||||||
|
|
||||||
|
# paddle DLLs 所在目录(mklml.dll 等不会被 PyInstaller 自动收集)
|
||||||
|
import paddle as _paddle
|
||||||
|
PADDLE_LIBS = str(Path(_paddle.__file__).parent / "libs")
|
||||||
|
|
||||||
|
|
||||||
def build(debug=False):
|
def build(debug=False):
|
||||||
"""使用 PyInstaller 打包(onedir 模式)"""
|
"""使用 PyInstaller 打包(onedir 模式)"""
|
||||||
@@ -39,6 +44,10 @@ def build(debug=False):
|
|||||||
"--hidden-import=ocr_offline",
|
"--hidden-import=ocr_offline",
|
||||||
"--hidden-import=paddleocr",
|
"--hidden-import=paddleocr",
|
||||||
"--hidden-import=paddle",
|
"--hidden-import=paddle",
|
||||||
|
# --- paddle DLLs(mklml.dll 等不会被自动收集) ---
|
||||||
|
f"--add-binary={PADDLE_LIBS}/*.dll{os.pathsep}paddle/libs",
|
||||||
|
# --- runtime hook: stub 掉 paddle 开发模块,避免 Cython 缺文件崩溃 ---
|
||||||
|
"--runtime-hook=rthook_paddle.py",
|
||||||
# --- 收集 paddleocr 全部数据(模型配置、字典等) ---
|
# --- 收集 paddleocr 全部数据(模型配置、字典等) ---
|
||||||
"--collect-all=paddleocr",
|
"--collect-all=paddleocr",
|
||||||
# --- 元数据(部分库在运行时通过 importlib.metadata 查版本) ---
|
# --- 元数据(部分库在运行时通过 importlib.metadata 查版本) ---
|
||||||
|
|||||||
33
rthook_paddle.py
Normal file
33
rthook_paddle.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
"""
|
||||||
|
PyInstaller runtime hook: stub 掉 paddle 中仅开发时需要的模块,
|
||||||
|
避免打包后因缺少 Cython Utility 文件而崩溃。
|
||||||
|
"""
|
||||||
|
import types
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
class _Stub(types.ModuleType):
|
||||||
|
"""空模块 stub,所有属性访问返回空类"""
|
||||||
|
def __getattr__(self, name):
|
||||||
|
if name.startswith("_"):
|
||||||
|
raise AttributeError(name)
|
||||||
|
return type(name, (), {})
|
||||||
|
|
||||||
|
|
||||||
|
def _inject(name):
|
||||||
|
if name not in sys.modules:
|
||||||
|
m = _Stub(name)
|
||||||
|
m.__path__ = []
|
||||||
|
m.__package__ = name
|
||||||
|
m.__spec__ = None
|
||||||
|
sys.modules[name] = m
|
||||||
|
|
||||||
|
|
||||||
|
# paddle.utils.cpp_extension 会拉入 Cython 编译器,推理不需要
|
||||||
|
for _p in [
|
||||||
|
"paddle.utils.cpp_extension",
|
||||||
|
"paddle.utils.cpp_extension.cpp_extension",
|
||||||
|
"paddle.utils.cpp_extension.extension_utils",
|
||||||
|
"paddle.utils.cpp_extension.jit_compile",
|
||||||
|
]:
|
||||||
|
_inject(_p)
|
||||||
@@ -11,10 +11,43 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import shutil
|
||||||
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_ascii_model_dir(model_dir: Path) -> str:
|
||||||
|
"""
|
||||||
|
PaddlePaddle C++ 推理引擎在 Windows 上不支持非 ASCII 路径。
|
||||||
|
如果模型路径含非 ASCII 字符,复制到临时目录(仅首次复制,后续复用)。
|
||||||
|
"""
|
||||||
|
s = str(model_dir)
|
||||||
|
try:
|
||||||
|
s.encode("ascii")
|
||||||
|
return s # 纯 ASCII,直接用
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 路径含非 ASCII,复制到 %TEMP%/post_ocr_models/<子目录名>
|
||||||
|
safe_base = Path(tempfile.gettempdir()) / "post_ocr_models"
|
||||||
|
safe_dir = safe_base / model_dir.name
|
||||||
|
|
||||||
|
# 用 pdmodel 文件大小做简单校验,避免每次都复制
|
||||||
|
src_marker = model_dir / "inference.pdmodel"
|
||||||
|
dst_marker = safe_dir / "inference.pdmodel"
|
||||||
|
if dst_marker.exists() and dst_marker.stat().st_size == src_marker.stat().st_size:
|
||||||
|
return str(safe_dir)
|
||||||
|
|
||||||
|
# 复制模型
|
||||||
|
log = logging.getLogger("post_ocr.ocr")
|
||||||
|
log.info("模型路径含非ASCII字符,复制到: %s", safe_dir)
|
||||||
|
if safe_dir.exists():
|
||||||
|
shutil.rmtree(safe_dir)
|
||||||
|
shutil.copytree(model_dir, safe_dir)
|
||||||
|
return str(safe_dir)
|
||||||
|
|
||||||
|
|
||||||
def _is_frozen() -> bool:
|
def _is_frozen() -> bool:
|
||||||
"""判断是否为 PyInstaller 打包后的运行环境"""
|
"""判断是否为 PyInstaller 打包后的运行环境"""
|
||||||
return bool(getattr(sys, "frozen", False))
|
return bool(getattr(sys, "frozen", False))
|
||||||
@@ -94,8 +127,9 @@ def create_offline_ocr(models_base_dir: Path | None = None):
|
|||||||
|
|
||||||
if (det_dir / "inference.pdmodel").exists() and (rec_dir / "inference.pdmodel").exists():
|
if (det_dir / "inference.pdmodel").exists() and (rec_dir / "inference.pdmodel").exists():
|
||||||
log.info("使用离线模型: %s", models_dir)
|
log.info("使用离线模型: %s", models_dir)
|
||||||
kwargs["det_model_dir"] = str(det_dir)
|
kwargs["det_model_dir"] = _ensure_ascii_model_dir(det_dir)
|
||||||
kwargs["rec_model_dir"] = str(rec_dir)
|
kwargs["rec_model_dir"] = _ensure_ascii_model_dir(rec_dir)
|
||||||
|
log.info("det_model_dir=%s, rec_model_dir=%s", kwargs["det_model_dir"], kwargs["rec_model_dir"])
|
||||||
else:
|
else:
|
||||||
log.info("未找到离线模型,将使用默认路径(可能需要联网下载)")
|
log.info("未找到离线模型,将使用默认路径(可能需要联网下载)")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user