From e2cb60884502292afa8897cda6a12285757d2d4c Mon Sep 17 00:00:00 2001
From: root <root@localhost.localdomain>
Date: Thu, 12 Feb 2026 06:55:59 +0000
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20PaddleOCR=20API=20?=
 =?UTF-8?q?=E5=85=BC=E5=AE=B9=E6=80=A7=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

由于 PaddleOCR 3.x 的 predict() 方法存在 PIR (Paddle IR)
兼容性问题，导致 OneDNN 指令执行失败，改用 2.x 版本的
ocr() 方法。

主要变更：
- 将 ocr.predict(img_path) 改为 ocr.ocr(img_path, cls=False)
- 适配 2.x 版本的返回格式：[box, (text, confidence)]
- 移除 Paddlex OCRResult 结构的适配代码

测试环境：
- paddleocr==2.10.0
- paddlepaddle==2.6.2

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/main.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/main.py b/src/main.py
index 283b4c5..184dfb0 100644
--- a/src/main.py
+++ b/src/main.py
@@ -35,18 +35,16 @@ def main():
 
     for img_path in tqdm(image_paths):
         try:
-            # 1. 执行 OCR 识别 (使用 predict 替代 deprecated 的 ocr 方法)
-            result = ocr.predict(img_path)
+            # 1. 执行 OCR 识别
+            result = ocr.ocr(img_path, cls=False)
 
-            # 2. 提取文字行 (适配 Paddlex OCRResult 结构)
+            # 2. 提取文字行
             ocr_texts = []
-            if result:
-                for res in result:
-                    # 获取识别出的文本列表
-                    if hasattr(res, "rec_texts"):
-                        ocr_texts.extend(res.rec_texts)
-                    elif isinstance(res, dict) and "rec_texts" in res:
-                        ocr_texts.extend(res["rec_texts"])
+            if result and result[0]:
+                for line in result[0]:
+                    # line 格式: [box, (text, confidence)]
+                    if line and len(line) >= 2:
+                        ocr_texts.append(line[1][0])
 
             # 3. 结构化解析
             if ocr_texts: