Files
cutThenThink/examples/ocr_example.py
congsh c4a77f8aa4 feat: 实现CutThenThink P0阶段核心功能
项目初始化
- 创建完整项目结构(src/, data/, docs/, examples/, tests/)
- 配置requirements.txt依赖
- 创建.gitignore

P0基础框架
- 数据库模型:Record模型,6种分类类型
- 配置管理:YAML配置,支持AI/OCR/云存储/UI配置
- OCR模块:PaddleOCR本地识别,支持云端扩展
- AI模块:支持OpenAI/Claude/通义/Ollama,6种分类
- 存储模块:完整CRUD,搜索,统计,导入导出
- 主窗口框架:侧边栏导航,米白配色方案
- 图片处理:截图/剪贴板/文件选择/图片预览
- 处理流程整合:OCR→AI→存储串联,Markdown展示,剪贴板复制
- 分类浏览:卡片网格展示,分类筛选,搜索,详情查看

技术栈
- PyQt6 + SQLAlchemy + PaddleOCR + OpenAI/Claude SDK
- 共47个Python文件,4000+行代码

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 18:21:31 +08:00

287 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
OCR 模块使用示例
演示如何使用 OCR 模块进行文字识别
"""
# 导入 OCR 模块
from src.core.ocr import (
recognize_text,
preprocess_image,
PaddleOCREngine,
CloudOCREngine,
OCRFactory,
ImagePreprocessor,
OCRResult,
OCRLanguage
)
def example_1_quick_recognize():
"""示例 1: 快速识别文本(最简单)"""
print("示例 1: 快速识别文本")
print("-" * 50)
result = recognize_text(
image="path/to/your/image.png",
mode="local", # 本地识别
lang="ch", # 中文
use_gpu=False, # 不使用 GPU
preprocess=False # 不预处理
)
if result.success:
print(f"识别成功!")
print(f"平均置信度: {result.total_confidence:.2f}")
print(f"识别行数: {len(result.results)}")
print(f"完整文本:\n{result.full_text}")
else:
print(f"识别失败: {result.error_message}")
def example_2_with_preprocess():
"""示例 2: 带预处理的识别(适合低质量图片)"""
print("\n示例 2: 带预处理的识别")
print("-" * 50)
result = recognize_text(
image="path/to/your/image.png",
mode="local",
lang="ch",
preprocess=True # 启用预处理(增强对比度、锐度等)
)
if result.success:
print(f"识别成功!")
print(f"完整文本:\n{result.full_text}")
def example_3_engine_directly():
"""示例 3: 直接使用 OCR 引擎"""
print("\n示例 3: 直接使用 OCR 引擎")
print("-" * 50)
# 创建引擎
config = {
'lang': 'ch', # 语言
'use_gpu': False, # 是否使用 GPU
'show_log': False # 是否显示日志
}
engine = PaddleOCREngine(config)
# 识别图片
result = engine.recognize(
image="path/to/your/image.png",
preprocess=False
)
if result.success:
print(f"识别成功!")
print(f"完整文本:\n{result.full_text}")
# 遍历每一行
for line_result in result.results:
print(f"{line_result.line_index}: {line_result.text} (置信度: {line_result.confidence:.2f})")
def example_4_batch_images():
"""示例 4: 批量处理多张图片"""
print("\n示例 4: 批量处理多张图片")
print("-" * 50)
image_list = [
"path/to/image1.png",
"path/to/image2.png",
"path/to/image3.png"
]
engine = PaddleOCREngine({'lang': 'ch'})
for i, image_path in enumerate(image_list, 1):
print(f"\n处理图片 {i}: {image_path}")
result = engine.recognize(image_path)
if result.success:
print(f" 识别成功,置信度: {result.total_confidence:.2f}")
print(f" 文本预览: {result.full_text[:100]}...")
else:
print(f" 识别失败: {result.error_message}")
def example_5_image_preprocess():
"""示例 5: 图像预处理(增强识别效果)"""
print("\n示例 5: 图像预处理")
print("-" * 50)
# 预处理并保存
processed = preprocess_image(
image_path="path/to/input.png",
output_path="path/to/output_processed.png",
resize=True, # 调整大小
enhance_contrast=True, # 增强对比度
enhance_sharpness=True, # 增强锐度
denoise=False, # 不去噪
binarize=False # 不二值化
)
print(f"预处理完成,图像尺寸: {processed.size}")
# 然后对预处理后的图片进行 OCR
result = recognize_text(
image=processed, # 可以传入 PIL Image
mode="local",
lang="ch"
)
if result.success:
print(f"识别文本: {result.full_text}")
def example_6_multilanguage():
"""示例 6: 多语言识别"""
print("\n示例 6: 多语言识别")
print("-" * 50)
# 中文
result_ch = recognize_text(
image="path/to/chinese_image.png",
lang="ch" # 中文
)
print(f"中文识别置信度: {result_ch.total_confidence:.2f}")
# 英文
result_en = recognize_text(
image="path/to/english_image.png",
lang="en" # 英文
)
print(f"英文识别置信度: {result_en.total_confidence:.2f}")
# 中英混合
result_mix = recognize_text(
image="path/to/mixed_image.png",
lang="chinese_chinese" # 中英混合
)
print(f"混合识别置信度: {result_mix.total_confidence:.2f}")
def example_7_cloud_ocr():
"""示例 7: 云端 OCR需要配置"""
print("\n示例 7: 云端 OCR")
print("-" * 50)
# 配置云端 OCR
config = {
'api_endpoint': 'https://api.example.com/ocr',
'api_key': 'your_api_key_here',
'provider': 'custom',
'timeout': 30
}
engine = CloudOCREngine(config)
# 注意:云端 OCR 需要根据具体 API 实现 _send_request 方法
result = engine.recognize("path/to/image.png")
if result.success:
print(f"识别成功: {result.full_text}")
else:
print(f"云端 OCR 尚未实现: {result.error_message}")
def example_8_factory_pattern():
"""示例 8: 使用工厂模式创建引擎"""
print("\n示例 8: 使用工厂模式创建引擎")
print("-" * 50)
# 创建本地引擎
local_engine = OCRFactory.create_engine(
mode="local",
config={'lang': 'ch'}
)
print(f"本地引擎类型: {type(local_engine).__name__}")
# 创建云端引擎
cloud_engine = OCRFactory.create_engine(
mode="cloud",
config={'api_endpoint': 'https://api.example.com/ocr'}
)
print(f"云端引擎类型: {type(cloud_engine).__name__}")
def example_9_detailed_result():
"""示例 9: 处理详细识别结果"""
print("\n示例 9: 处理详细识别结果")
print("-" * 50)
result = recognize_text(
image="path/to/image.png",
mode="local",
lang="ch"
)
if result.success:
# 遍历每一行结果
for line_result in result.results:
print(f"\n{line_result.line_index}:")
print(f" 文本: {line_result.text}")
print(f" 置信度: {line_result.confidence:.2f}")
# 如果有坐标信息
if line_result.bbox:
print(f" 坐标: {line_result.bbox}")
# 统计信息
total_chars = sum(len(r.text) for r in result.results)
avg_confidence = sum(r.confidence for r in result.results) / len(result.results)
print(f"\n统计:")
print(f" 总行数: {len(result.results)}")
print(f" 总字符数: {total_chars}")
print(f" 平均置信度: {avg_confidence:.2f}")
def example_10_pil_image_input():
"""示例 10: 使用 PIL Image 作为输入"""
print("\n示例 10: 使用 PIL Image 作为输入")
print("-" * 50)
from PIL import Image
# 加载图像
pil_image = Image.open("path/to/image.png")
# 裁剪感兴趣区域
cropped = pil_image.crop((100, 100, 500, 300))
# 直接识别 PIL Image
result = recognize_text(
image=cropped, # 直接传入 PIL Image 对象
mode="local",
lang="ch"
)
if result.success:
print(f"识别结果: {result.full_text}")
if __name__ == '__main__':
print("OCR 模块使用示例")
print("=" * 50)
print("\n注意:运行这些示例前,请确保:")
print("1. 安装依赖: pip install paddleocr paddlepaddle")
print("2. 将示例中的 'path/to/image.png' 替换为实际图片路径")
print("=" * 50)
# 取消注释想要运行的示例
# example_1_quick_recognize()
# example_2_with_preprocess()
# example_3_engine_directly()
# example_4_batch_images()
# example_5_image_preprocess()
# example_6_multilanguage()
# example_7_cloud_ocr()
# example_8_factory_pattern()
# example_9_detailed_result()
# example_10_pil_image_input()