cutThenThink/examples/ocr_example.py

"""
OCR 模块使用示例

演示如何使用 OCR 模块进行文字识别
"""

# 导入 OCR 模块
from src.core.ocr import (
    recognize_text,
    preprocess_image,
    PaddleOCREngine,
    CloudOCREngine,
    OCRFactory,
    ImagePreprocessor,
    OCRResult,
    OCRLanguage
)


def example_1_quick_recognize():
    """示例 1: 快速识别文本（最简单）"""
    print("示例 1: 快速识别文本")
    print("-" * 50)

    result = recognize_text(
        image="path/to/your/image.png",
        mode="local",      # 本地识别
        lang="ch",         # 中文
        use_gpu=False,     # 不使用 GPU
        preprocess=False   # 不预处理
    )

    if result.success:
        print(f"识别成功！")
        print(f"平均置信度: {result.total_confidence:.2f}")
        print(f"识别行数: {len(result.results)}")
        print(f"完整文本:\n{result.full_text}")
    else:
        print(f"识别失败: {result.error_message}")


def example_2_with_preprocess():
    """示例 2: 带预处理的识别（适合低质量图片）"""
    print("\n示例 2: 带预处理的识别")
    print("-" * 50)

    result = recognize_text(
        image="path/to/your/image.png",
        mode="local",
        lang="ch",
        preprocess=True  # 启用预处理（增强对比度、锐度等）
    )

    if result.success:
        print(f"识别成功！")
        print(f"完整文本:\n{result.full_text}")


def example_3_engine_directly():
    """示例 3: 直接使用 OCR 引擎"""
    print("\n示例 3: 直接使用 OCR 引擎")
    print("-" * 50)

    # 创建引擎
    config = {
        'lang': 'ch',      # 语言
        'use_gpu': False,  # 是否使用 GPU
        'show_log': False  # 是否显示日志
    }

    engine = PaddleOCREngine(config)

    # 识别图片
    result = engine.recognize(
        image="path/to/your/image.png",
        preprocess=False
    )

    if result.success:
        print(f"识别成功！")
        print(f"完整文本:\n{result.full_text}")

        # 遍历每一行
        for line_result in result.results:
            print(f"行 {line_result.line_index}: {line_result.text} (置信度: {line_result.confidence:.2f})")


def example_4_batch_images():
    """示例 4: 批量处理多张图片"""
    print("\n示例 4: 批量处理多张图片")
    print("-" * 50)

    image_list = [
        "path/to/image1.png",
        "path/to/image2.png",
        "path/to/image3.png"
    ]

    engine = PaddleOCREngine({'lang': 'ch'})

    for i, image_path in enumerate(image_list, 1):
        print(f"\n处理图片 {i}: {image_path}")
        result = engine.recognize(image_path)

        if result.success:
            print(f"  识别成功，置信度: {result.total_confidence:.2f}")
            print(f"  文本预览: {result.full_text[:100]}...")
        else:
            print(f"  识别失败: {result.error_message}")


def example_5_image_preprocess():
    """示例 5: 图像预处理（增强识别效果）"""
    print("\n示例 5: 图像预处理")
    print("-" * 50)

    # 预处理并保存
    processed = preprocess_image(
        image_path="path/to/input.png",
        output_path="path/to/output_processed.png",
        resize=True,              # 调整大小
        enhance_contrast=True,    # 增强对比度
        enhance_sharpness=True,   # 增强锐度
        denoise=False,            # 不去噪
        binarize=False            # 不二值化
    )

    print(f"预处理完成，图像尺寸: {processed.size}")

    # 然后对预处理后的图片进行 OCR
    result = recognize_text(
        image=processed,  # 可以传入 PIL Image
        mode="local",
        lang="ch"
    )

    if result.success:
        print(f"识别文本: {result.full_text}")


def example_6_multilanguage():
    """示例 6: 多语言识别"""
    print("\n示例 6: 多语言识别")
    print("-" * 50)

    # 中文
    result_ch = recognize_text(
        image="path/to/chinese_image.png",
        lang="ch"  # 中文
    )
    print(f"中文识别置信度: {result_ch.total_confidence:.2f}")

    # 英文
    result_en = recognize_text(
        image="path/to/english_image.png",
        lang="en"  # 英文
    )
    print(f"英文识别置信度: {result_en.total_confidence:.2f}")

    # 中英混合
    result_mix = recognize_text(
        image="path/to/mixed_image.png",
        lang="chinese_chinese"  # 中英混合
    )
    print(f"混合识别置信度: {result_mix.total_confidence:.2f}")


def example_7_cloud_ocr():
    """示例 7: 云端 OCR（需要配置）"""
    print("\n示例 7: 云端 OCR")
    print("-" * 50)

    # 配置云端 OCR
    config = {
        'api_endpoint': 'https://api.example.com/ocr',
        'api_key': 'your_api_key_here',
        'provider': 'custom',
        'timeout': 30
    }

    engine = CloudOCREngine(config)

    # 注意：云端 OCR 需要根据具体 API 实现 _send_request 方法
    result = engine.recognize("path/to/image.png")

    if result.success:
        print(f"识别成功: {result.full_text}")
    else:
        print(f"云端 OCR 尚未实现: {result.error_message}")


def example_8_factory_pattern():
    """示例 8: 使用工厂模式创建引擎"""
    print("\n示例 8: 使用工厂模式创建引擎")
    print("-" * 50)

    # 创建本地引擎
    local_engine = OCRFactory.create_engine(
        mode="local",
        config={'lang': 'ch'}
    )
    print(f"本地引擎类型: {type(local_engine).__name__}")

    # 创建云端引擎
    cloud_engine = OCRFactory.create_engine(
        mode="cloud",
        config={'api_endpoint': 'https://api.example.com/ocr'}
    )
    print(f"云端引擎类型: {type(cloud_engine).__name__}")


def example_9_detailed_result():
    """示例 9: 处理详细识别结果"""
    print("\n示例 9: 处理详细识别结果")
    print("-" * 50)

    result = recognize_text(
        image="path/to/image.png",
        mode="local",
        lang="ch"
    )

    if result.success:
        # 遍历每一行结果
        for line_result in result.results:
            print(f"\n行 {line_result.line_index}:")
            print(f"  文本: {line_result.text}")
            print(f"  置信度: {line_result.confidence:.2f}")

            # 如果有坐标信息
            if line_result.bbox:
                print(f"  坐标: {line_result.bbox}")

        # 统计信息
        total_chars = sum(len(r.text) for r in result.results)
        avg_confidence = sum(r.confidence for r in result.results) / len(result.results)

        print(f"\n统计:")
        print(f"  总行数: {len(result.results)}")
        print(f"  总字符数: {total_chars}")
        print(f"  平均置信度: {avg_confidence:.2f}")


def example_10_pil_image_input():
    """示例 10: 使用 PIL Image 作为输入"""
    print("\n示例 10: 使用 PIL Image 作为输入")
    print("-" * 50)

    from PIL import Image

    # 加载图像
    pil_image = Image.open("path/to/image.png")

    # 裁剪感兴趣区域
    cropped = pil_image.crop((100, 100, 500, 300))

    # 直接识别 PIL Image
    result = recognize_text(
        image=cropped,  # 直接传入 PIL Image 对象
        mode="local",
        lang="ch"
    )

    if result.success:
        print(f"识别结果: {result.full_text}")


if __name__ == '__main__':
    print("OCR 模块使用示例")
    print("=" * 50)
    print("\n注意：运行这些示例前，请确保:")
    print("1. 安装依赖: pip install paddleocr paddlepaddle")
    print("2. 将示例中的 'path/to/image.png' 替换为实际图片路径")
    print("=" * 50)

    # 取消注释想要运行的示例
    # example_1_quick_recognize()
    # example_2_with_preprocess()
    # example_3_engine_directly()
    # example_4_batch_images()
    # example_5_image_preprocess()
    # example_6_multilanguage()
    # example_7_cloud_ocr()
    # example_8_factory_pattern()
    # example_9_detailed_result()
    # example_10_pil_image_input()