feat: 实现CutThenThink P0阶段核心功能
项目初始化 - 创建完整项目结构(src/, data/, docs/, examples/, tests/) - 配置requirements.txt依赖 - 创建.gitignore P0基础框架 - 数据库模型:Record模型,6种分类类型 - 配置管理:YAML配置,支持AI/OCR/云存储/UI配置 - OCR模块:PaddleOCR本地识别,支持云端扩展 - AI模块:支持OpenAI/Claude/通义/Ollama,6种分类 - 存储模块:完整CRUD,搜索,统计,导入导出 - 主窗口框架:侧边栏导航,米白配色方案 - 图片处理:截图/剪贴板/文件选择/图片预览 - 处理流程整合:OCR→AI→存储串联,Markdown展示,剪贴板复制 - 分类浏览:卡片网格展示,分类筛选,搜索,详情查看 技术栈 - PyQt6 + SQLAlchemy + PaddleOCR + OpenAI/Claude SDK - 共47个Python文件,4000+行代码 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
286
examples/ocr_example.py
Normal file
286
examples/ocr_example.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""
|
||||
OCR 模块使用示例
|
||||
|
||||
演示如何使用 OCR 模块进行文字识别
|
||||
"""
|
||||
|
||||
# 导入 OCR 模块
|
||||
from src.core.ocr import (
|
||||
recognize_text,
|
||||
preprocess_image,
|
||||
PaddleOCREngine,
|
||||
CloudOCREngine,
|
||||
OCRFactory,
|
||||
ImagePreprocessor,
|
||||
OCRResult,
|
||||
OCRLanguage
|
||||
)
|
||||
|
||||
|
||||
def example_1_quick_recognize():
|
||||
"""示例 1: 快速识别文本(最简单)"""
|
||||
print("示例 1: 快速识别文本")
|
||||
print("-" * 50)
|
||||
|
||||
result = recognize_text(
|
||||
image="path/to/your/image.png",
|
||||
mode="local", # 本地识别
|
||||
lang="ch", # 中文
|
||||
use_gpu=False, # 不使用 GPU
|
||||
preprocess=False # 不预处理
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"识别成功!")
|
||||
print(f"平均置信度: {result.total_confidence:.2f}")
|
||||
print(f"识别行数: {len(result.results)}")
|
||||
print(f"完整文本:\n{result.full_text}")
|
||||
else:
|
||||
print(f"识别失败: {result.error_message}")
|
||||
|
||||
|
||||
def example_2_with_preprocess():
|
||||
"""示例 2: 带预处理的识别(适合低质量图片)"""
|
||||
print("\n示例 2: 带预处理的识别")
|
||||
print("-" * 50)
|
||||
|
||||
result = recognize_text(
|
||||
image="path/to/your/image.png",
|
||||
mode="local",
|
||||
lang="ch",
|
||||
preprocess=True # 启用预处理(增强对比度、锐度等)
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"识别成功!")
|
||||
print(f"完整文本:\n{result.full_text}")
|
||||
|
||||
|
||||
def example_3_engine_directly():
|
||||
"""示例 3: 直接使用 OCR 引擎"""
|
||||
print("\n示例 3: 直接使用 OCR 引擎")
|
||||
print("-" * 50)
|
||||
|
||||
# 创建引擎
|
||||
config = {
|
||||
'lang': 'ch', # 语言
|
||||
'use_gpu': False, # 是否使用 GPU
|
||||
'show_log': False # 是否显示日志
|
||||
}
|
||||
|
||||
engine = PaddleOCREngine(config)
|
||||
|
||||
# 识别图片
|
||||
result = engine.recognize(
|
||||
image="path/to/your/image.png",
|
||||
preprocess=False
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"识别成功!")
|
||||
print(f"完整文本:\n{result.full_text}")
|
||||
|
||||
# 遍历每一行
|
||||
for line_result in result.results:
|
||||
print(f"行 {line_result.line_index}: {line_result.text} (置信度: {line_result.confidence:.2f})")
|
||||
|
||||
|
||||
def example_4_batch_images():
|
||||
"""示例 4: 批量处理多张图片"""
|
||||
print("\n示例 4: 批量处理多张图片")
|
||||
print("-" * 50)
|
||||
|
||||
image_list = [
|
||||
"path/to/image1.png",
|
||||
"path/to/image2.png",
|
||||
"path/to/image3.png"
|
||||
]
|
||||
|
||||
engine = PaddleOCREngine({'lang': 'ch'})
|
||||
|
||||
for i, image_path in enumerate(image_list, 1):
|
||||
print(f"\n处理图片 {i}: {image_path}")
|
||||
result = engine.recognize(image_path)
|
||||
|
||||
if result.success:
|
||||
print(f" 识别成功,置信度: {result.total_confidence:.2f}")
|
||||
print(f" 文本预览: {result.full_text[:100]}...")
|
||||
else:
|
||||
print(f" 识别失败: {result.error_message}")
|
||||
|
||||
|
||||
def example_5_image_preprocess():
|
||||
"""示例 5: 图像预处理(增强识别效果)"""
|
||||
print("\n示例 5: 图像预处理")
|
||||
print("-" * 50)
|
||||
|
||||
# 预处理并保存
|
||||
processed = preprocess_image(
|
||||
image_path="path/to/input.png",
|
||||
output_path="path/to/output_processed.png",
|
||||
resize=True, # 调整大小
|
||||
enhance_contrast=True, # 增强对比度
|
||||
enhance_sharpness=True, # 增强锐度
|
||||
denoise=False, # 不去噪
|
||||
binarize=False # 不二值化
|
||||
)
|
||||
|
||||
print(f"预处理完成,图像尺寸: {processed.size}")
|
||||
|
||||
# 然后对预处理后的图片进行 OCR
|
||||
result = recognize_text(
|
||||
image=processed, # 可以传入 PIL Image
|
||||
mode="local",
|
||||
lang="ch"
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"识别文本: {result.full_text}")
|
||||
|
||||
|
||||
def example_6_multilanguage():
|
||||
"""示例 6: 多语言识别"""
|
||||
print("\n示例 6: 多语言识别")
|
||||
print("-" * 50)
|
||||
|
||||
# 中文
|
||||
result_ch = recognize_text(
|
||||
image="path/to/chinese_image.png",
|
||||
lang="ch" # 中文
|
||||
)
|
||||
print(f"中文识别置信度: {result_ch.total_confidence:.2f}")
|
||||
|
||||
# 英文
|
||||
result_en = recognize_text(
|
||||
image="path/to/english_image.png",
|
||||
lang="en" # 英文
|
||||
)
|
||||
print(f"英文识别置信度: {result_en.total_confidence:.2f}")
|
||||
|
||||
# 中英混合
|
||||
result_mix = recognize_text(
|
||||
image="path/to/mixed_image.png",
|
||||
lang="chinese_chinese" # 中英混合
|
||||
)
|
||||
print(f"混合识别置信度: {result_mix.total_confidence:.2f}")
|
||||
|
||||
|
||||
def example_7_cloud_ocr():
|
||||
"""示例 7: 云端 OCR(需要配置)"""
|
||||
print("\n示例 7: 云端 OCR")
|
||||
print("-" * 50)
|
||||
|
||||
# 配置云端 OCR
|
||||
config = {
|
||||
'api_endpoint': 'https://api.example.com/ocr',
|
||||
'api_key': 'your_api_key_here',
|
||||
'provider': 'custom',
|
||||
'timeout': 30
|
||||
}
|
||||
|
||||
engine = CloudOCREngine(config)
|
||||
|
||||
# 注意:云端 OCR 需要根据具体 API 实现 _send_request 方法
|
||||
result = engine.recognize("path/to/image.png")
|
||||
|
||||
if result.success:
|
||||
print(f"识别成功: {result.full_text}")
|
||||
else:
|
||||
print(f"云端 OCR 尚未实现: {result.error_message}")
|
||||
|
||||
|
||||
def example_8_factory_pattern():
|
||||
"""示例 8: 使用工厂模式创建引擎"""
|
||||
print("\n示例 8: 使用工厂模式创建引擎")
|
||||
print("-" * 50)
|
||||
|
||||
# 创建本地引擎
|
||||
local_engine = OCRFactory.create_engine(
|
||||
mode="local",
|
||||
config={'lang': 'ch'}
|
||||
)
|
||||
print(f"本地引擎类型: {type(local_engine).__name__}")
|
||||
|
||||
# 创建云端引擎
|
||||
cloud_engine = OCRFactory.create_engine(
|
||||
mode="cloud",
|
||||
config={'api_endpoint': 'https://api.example.com/ocr'}
|
||||
)
|
||||
print(f"云端引擎类型: {type(cloud_engine).__name__}")
|
||||
|
||||
|
||||
def example_9_detailed_result():
|
||||
"""示例 9: 处理详细识别结果"""
|
||||
print("\n示例 9: 处理详细识别结果")
|
||||
print("-" * 50)
|
||||
|
||||
result = recognize_text(
|
||||
image="path/to/image.png",
|
||||
mode="local",
|
||||
lang="ch"
|
||||
)
|
||||
|
||||
if result.success:
|
||||
# 遍历每一行结果
|
||||
for line_result in result.results:
|
||||
print(f"\n行 {line_result.line_index}:")
|
||||
print(f" 文本: {line_result.text}")
|
||||
print(f" 置信度: {line_result.confidence:.2f}")
|
||||
|
||||
# 如果有坐标信息
|
||||
if line_result.bbox:
|
||||
print(f" 坐标: {line_result.bbox}")
|
||||
|
||||
# 统计信息
|
||||
total_chars = sum(len(r.text) for r in result.results)
|
||||
avg_confidence = sum(r.confidence for r in result.results) / len(result.results)
|
||||
|
||||
print(f"\n统计:")
|
||||
print(f" 总行数: {len(result.results)}")
|
||||
print(f" 总字符数: {total_chars}")
|
||||
print(f" 平均置信度: {avg_confidence:.2f}")
|
||||
|
||||
|
||||
def example_10_pil_image_input():
|
||||
"""示例 10: 使用 PIL Image 作为输入"""
|
||||
print("\n示例 10: 使用 PIL Image 作为输入")
|
||||
print("-" * 50)
|
||||
|
||||
from PIL import Image
|
||||
|
||||
# 加载图像
|
||||
pil_image = Image.open("path/to/image.png")
|
||||
|
||||
# 裁剪感兴趣区域
|
||||
cropped = pil_image.crop((100, 100, 500, 300))
|
||||
|
||||
# 直接识别 PIL Image
|
||||
result = recognize_text(
|
||||
image=cropped, # 直接传入 PIL Image 对象
|
||||
mode="local",
|
||||
lang="ch"
|
||||
)
|
||||
|
||||
if result.success:
|
||||
print(f"识别结果: {result.full_text}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("OCR 模块使用示例")
|
||||
print("=" * 50)
|
||||
print("\n注意:运行这些示例前,请确保:")
|
||||
print("1. 安装依赖: pip install paddleocr paddlepaddle")
|
||||
print("2. 将示例中的 'path/to/image.png' 替换为实际图片路径")
|
||||
print("=" * 50)
|
||||
|
||||
# 取消注释想要运行的示例
|
||||
# example_1_quick_recognize()
|
||||
# example_2_with_preprocess()
|
||||
# example_3_engine_directly()
|
||||
# example_4_batch_images()
|
||||
# example_5_image_preprocess()
|
||||
# example_6_multilanguage()
|
||||
# example_7_cloud_ocr()
|
||||
# example_8_factory_pattern()
|
||||
# example_9_detailed_result()
|
||||
# example_10_pil_image_input()
|
||||
Reference in New Issue
Block a user