import os
import tempfile
import base64
import pandas as pd
import streamlit as st
import streamlit.components.v1 as components
from paddleocr import PaddleOCR
from processor import extract_info, save_to_excel
os.environ["PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK"] = "True"
st.set_page_config(
page_title="信封信息提取系统",
page_icon="📮",
layout="centered",
initial_sidebar_state="collapsed",
)
st.markdown("""
""", unsafe_allow_html=True)
st.title("📮 信封信息提取")
@st.cache_resource
def load_ocr():
return PaddleOCR(use_textline_orientation=True, lang="ch", show_log=False)
ocr = load_ocr()
def process_image(image_data):
"""处理图片数据"""
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp:
tmp.write(image_data)
tmp_path = tmp.name
try:
result = ocr.ocr(tmp_path, cls=False)
ocr_texts = []
if result and result[0]:
for line in result[0]:
if line and len(line) >= 2:
ocr_texts.append(line[1][0])
return extract_info(ocr_texts), ocr_texts
finally:
os.unlink(tmp_path)
# 自定义摄像头组件,带叠加扫描框
CAMERA_COMPONENT = """
↑ 编号在此处 ↑
📌 将信封背面对齐绿色框,编号对准底部
"""
# 初始化 session state
if "records" not in st.session_state:
st.session_state.records = []
# 输入方式选择
tab_camera, tab_upload = st.tabs(["📷 拍照扫描", "📁 上传图片"])
with tab_camera:
# 使用自定义摄像头组件
photo_data = components.html(CAMERA_COMPONENT, height=550)
# 检查是否有拍照数据
if "captured_image" not in st.session_state:
st.session_state.captured_image = None
# 文件上传作为备用(用于接收JS传来的数据)
uploaded_photo = st.file_uploader(
"或直接上传照片",
type=["jpg", "jpeg", "png"],
key="camera_upload",
label_visibility="collapsed"
)
if uploaded_photo:
with st.spinner("识别中..."):
record, raw_texts = process_image(uploaded_photo.getvalue())
st.success("✅ 识别完成!")
col1, col2 = st.columns(2)
with col1:
st.image(uploaded_photo, caption="拍摄图片", use_container_width=True)
with col2:
st.metric("邮编", record.get("邮编", "-"))
st.metric("电话", record.get("电话", "-"))
st.metric("联系人", record.get("联系人/单位名", "-"))
st.text_area("地址", record.get("地址", ""), disabled=True, height=68)
st.text_input("编号", record.get("编号", ""), disabled=True)
if st.button("✅ 添加到列表", type="primary", key="add_camera"):
record["来源"] = "拍照"
st.session_state.records.append(record)
st.success(f"已添加!当前共 {len(st.session_state.records)} 条记录")
st.rerun()
with tab_upload:
uploaded_files = st.file_uploader(
"选择图片文件",
type=["jpg", "jpeg", "png", "bmp"],
accept_multiple_files=True,
label_visibility="collapsed",
)
if uploaded_files:
if st.button("🚀 开始识别", type="primary"):
progress = st.progress(0)
for i, file in enumerate(uploaded_files):
with st.spinner(f"处理 {file.name}..."):
record, _ = process_image(file.getvalue())
record["来源"] = file.name
st.session_state.records.append(record)
progress.progress((i + 1) / len(uploaded_files))
st.success(f"完成!已添加 {len(uploaded_files)} 条记录")
st.rerun()
# 显示已收集的记录
st.divider()
st.subheader(f"📋 已收集 {len(st.session_state.records)} 条记录")
if st.session_state.records:
df = pd.DataFrame(st.session_state.records)
cols = ["来源", "编号", "邮编", "地址", "联系人/单位名", "电话"]
df = df.reindex(columns=[c for c in cols if c in df.columns])
st.dataframe(df, use_container_width=True, hide_index=True)
col1, col2 = st.columns(2)
with col1:
output_path = tempfile.mktemp(suffix=".xlsx")
df.to_excel(output_path, index=False)
with open(output_path, "rb") as f:
st.download_button(
"📥 下载 Excel",
data=f,
file_name="信封提取结果.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
os.unlink(output_path)
with col2:
if st.button("🗑️ 清空列表"):
st.session_state.records = []
st.rerun()
else:
st.info("👆 使用上方拍照或上传功能添加记录")