feat: 实现CutThenThink P0阶段核心功能

项目初始化 - 创建完整项目结构（src/, data/, docs/, examples/, tests/） - 配置requirements.txt依赖 - 创建.gitignore P0基础框架 - 数据库模型：Record模型，6种分类类型 - 配置管理：YAML配置，支持AI/OCR/云存储/UI配置 - OCR模块：PaddleOCR本地识别，支持云端扩展 - AI模块：支持OpenAI/Claude/通义/Ollama，6种分类 - 存储模块：完整CRUD，搜索，统计，导入导出 - 主窗口框架：侧边栏导航，米白配色方案 - 图片处理：截图/剪贴板/文件选择/图片预览 - 处理流程整合：OCR→AI→存储串联，Markdown展示，剪贴板复制 - 分类浏览：卡片网格展示，分类筛选，搜索，详情查看技术栈 - PyQt6 + SQLAlchemy + PaddleOCR + OpenAI/Claude SDK - 共47个Python文件，4000+行代码 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-11 18:21:31 +08:00
commit c4a77f8aa4
79 changed files with 19412 additions and 0 deletions
--- a/examples/ocr_example.py
+++ b/examples/ocr_example.py
@@ -0,0 +1,286 @@
+"""
+OCR 模块使用示例
+
+演示如何使用 OCR 模块进行文字识别
+"""
+
+# 导入 OCR 模块
+from src.core.ocr import (
+    recognize_text,
+    preprocess_image,
+    PaddleOCREngine,
+    CloudOCREngine,
+    OCRFactory,
+    ImagePreprocessor,
+    OCRResult,
+    OCRLanguage
+)
+
+
+def example_1_quick_recognize():
+    """示例 1: 快速识别文本（最简单）"""
+    print("示例 1: 快速识别文本")
+    print("-" * 50)
+
+    result = recognize_text(
+        image="path/to/your/image.png",
+        mode="local",      # 本地识别
+        lang="ch",         # 中文
+        use_gpu=False,     # 不使用 GPU
+        preprocess=False   # 不预处理
+    )
+
+    if result.success:
+        print(f"识别成功！")
+        print(f"平均置信度: {result.total_confidence:.2f}")
+        print(f"识别行数: {len(result.results)}")
+        print(f"完整文本:\n{result.full_text}")
+    else:
+        print(f"识别失败: {result.error_message}")
+
+
+def example_2_with_preprocess():
+    """示例 2: 带预处理的识别（适合低质量图片）"""
+    print("\n示例 2: 带预处理的识别")
+    print("-" * 50)
+
+    result = recognize_text(
+        image="path/to/your/image.png",
+        mode="local",
+        lang="ch",
+        preprocess=True  # 启用预处理（增强对比度、锐度等）
+    )
+
+    if result.success:
+        print(f"识别成功！")
+        print(f"完整文本:\n{result.full_text}")
+
+
+def example_3_engine_directly():
+    """示例 3: 直接使用 OCR 引擎"""
+    print("\n示例 3: 直接使用 OCR 引擎")
+    print("-" * 50)
+
+    # 创建引擎
+    config = {
+        'lang': 'ch',      # 语言
+        'use_gpu': False,  # 是否使用 GPU
+        'show_log': False  # 是否显示日志
+    }
+
+    engine = PaddleOCREngine(config)
+
+    # 识别图片
+    result = engine.recognize(
+        image="path/to/your/image.png",
+        preprocess=False
+    )
+
+    if result.success:
+        print(f"识别成功！")
+        print(f"完整文本:\n{result.full_text}")
+
+        # 遍历每一行
+        for line_result in result.results:
+            print(f"行 {line_result.line_index}: {line_result.text} (置信度: {line_result.confidence:.2f})")
+
+
+def example_4_batch_images():
+    """示例 4: 批量处理多张图片"""
+    print("\n示例 4: 批量处理多张图片")
+    print("-" * 50)
+
+    image_list = [
+        "path/to/image1.png",
+        "path/to/image2.png",
+        "path/to/image3.png"
+    ]
+
+    engine = PaddleOCREngine({'lang': 'ch'})
+
+    for i, image_path in enumerate(image_list, 1):
+        print(f"\n处理图片 {i}: {image_path}")
+        result = engine.recognize(image_path)
+
+        if result.success:
+            print(f"  识别成功，置信度: {result.total_confidence:.2f}")
+            print(f"  文本预览: {result.full_text[:100]}...")
+        else:
+            print(f"  识别失败: {result.error_message}")
+
+
+def example_5_image_preprocess():
+    """示例 5: 图像预处理（增强识别效果）"""
+    print("\n示例 5: 图像预处理")
+    print("-" * 50)
+
+    # 预处理并保存
+    processed = preprocess_image(
+        image_path="path/to/input.png",
+        output_path="path/to/output_processed.png",
+        resize=True,              # 调整大小
+        enhance_contrast=True,    # 增强对比度
+        enhance_sharpness=True,   # 增强锐度
+        denoise=False,            # 不去噪
+        binarize=False            # 不二值化
+    )
+
+    print(f"预处理完成，图像尺寸: {processed.size}")
+
+    # 然后对预处理后的图片进行 OCR
+    result = recognize_text(
+        image=processed,  # 可以传入 PIL Image
+        mode="local",
+        lang="ch"
+    )
+
+    if result.success:
+        print(f"识别文本: {result.full_text}")
+
+
+def example_6_multilanguage():
+    """示例 6: 多语言识别"""
+    print("\n示例 6: 多语言识别")
+    print("-" * 50)
+
+    # 中文
+    result_ch = recognize_text(
+        image="path/to/chinese_image.png",
+        lang="ch"  # 中文
+    )
+    print(f"中文识别置信度: {result_ch.total_confidence:.2f}")
+
+    # 英文
+    result_en = recognize_text(
+        image="path/to/english_image.png",
+        lang="en"  # 英文
+    )
+    print(f"英文识别置信度: {result_en.total_confidence:.2f}")
+
+    # 中英混合
+    result_mix = recognize_text(
+        image="path/to/mixed_image.png",
+        lang="chinese_chinese"  # 中英混合
+    )
+    print(f"混合识别置信度: {result_mix.total_confidence:.2f}")
+
+
+def example_7_cloud_ocr():
+    """示例 7: 云端 OCR（需要配置）"""
+    print("\n示例 7: 云端 OCR")
+    print("-" * 50)
+
+    # 配置云端 OCR
+    config = {
+        'api_endpoint': 'https://api.example.com/ocr',
+        'api_key': 'your_api_key_here',
+        'provider': 'custom',
+        'timeout': 30
+    }
+
+    engine = CloudOCREngine(config)
+
+    # 注意：云端 OCR 需要根据具体 API 实现 _send_request 方法
+    result = engine.recognize("path/to/image.png")
+
+    if result.success:
+        print(f"识别成功: {result.full_text}")
+    else:
+        print(f"云端 OCR 尚未实现: {result.error_message}")
+
+
+def example_8_factory_pattern():
+    """示例 8: 使用工厂模式创建引擎"""
+    print("\n示例 8: 使用工厂模式创建引擎")
+    print("-" * 50)
+
+    # 创建本地引擎
+    local_engine = OCRFactory.create_engine(
+        mode="local",
+        config={'lang': 'ch'}
+    )
+    print(f"本地引擎类型: {type(local_engine).__name__}")
+
+    # 创建云端引擎
+    cloud_engine = OCRFactory.create_engine(
+        mode="cloud",
+        config={'api_endpoint': 'https://api.example.com/ocr'}
+    )
+    print(f"云端引擎类型: {type(cloud_engine).__name__}")
+
+
+def example_9_detailed_result():
+    """示例 9: 处理详细识别结果"""
+    print("\n示例 9: 处理详细识别结果")
+    print("-" * 50)
+
+    result = recognize_text(
+        image="path/to/image.png",
+        mode="local",
+        lang="ch"
+    )
+
+    if result.success:
+        # 遍历每一行结果
+        for line_result in result.results:
+            print(f"\n行 {line_result.line_index}:")
+            print(f"  文本: {line_result.text}")
+            print(f"  置信度: {line_result.confidence:.2f}")
+
+            # 如果有坐标信息
+            if line_result.bbox:
+                print(f"  坐标: {line_result.bbox}")
+
+        # 统计信息
+        total_chars = sum(len(r.text) for r in result.results)
+        avg_confidence = sum(r.confidence for r in result.results) / len(result.results)
+
+        print(f"\n统计:")
+        print(f"  总行数: {len(result.results)}")
+        print(f"  总字符数: {total_chars}")
+        print(f"  平均置信度: {avg_confidence:.2f}")
+
+
+def example_10_pil_image_input():
+    """示例 10: 使用 PIL Image 作为输入"""
+    print("\n示例 10: 使用 PIL Image 作为输入")
+    print("-" * 50)
+
+    from PIL import Image
+
+    # 加载图像
+    pil_image = Image.open("path/to/image.png")
+
+    # 裁剪感兴趣区域
+    cropped = pil_image.crop((100, 100, 500, 300))
+
+    # 直接识别 PIL Image
+    result = recognize_text(
+        image=cropped,  # 直接传入 PIL Image 对象
+        mode="local",
+        lang="ch"
+    )
+
+    if result.success:
+        print(f"识别结果: {result.full_text}")
+
+
+if __name__ == '__main__':
+    print("OCR 模块使用示例")
+    print("=" * 50)
+    print("\n注意：运行这些示例前，请确保:")
+    print("1. 安装依赖: pip install paddleocr paddlepaddle")
+    print("2. 将示例中的 'path/to/image.png' 替换为实际图片路径")
+    print("=" * 50)
+
+    # 取消注释想要运行的示例
+    # example_1_quick_recognize()
+    # example_2_with_preprocess()
+    # example_3_engine_directly()
+    # example_4_batch_images()
+    # example_5_image_preprocess()
+    # example_6_multilanguage()
+    # example_7_cloud_ocr()
+    # example_8_factory_pattern()
+    # example_9_detailed_result()
+    # example_10_pil_image_input()