Files
cutThenThink/examples/ocr_example.py

287 lines
7.8 KiB
Python
Raw Normal View History

"""
OCR 模块使用示例
演示如何使用 OCR 模块进行文字识别
"""
# 导入 OCR 模块
from src.core.ocr import (
recognize_text,
preprocess_image,
PaddleOCREngine,
CloudOCREngine,
OCRFactory,
ImagePreprocessor,
OCRResult,
OCRLanguage
)
def example_1_quick_recognize():
"""示例 1: 快速识别文本(最简单)"""
print("示例 1: 快速识别文本")
print("-" * 50)
result = recognize_text(
image="path/to/your/image.png",
mode="local", # 本地识别
lang="ch", # 中文
use_gpu=False, # 不使用 GPU
preprocess=False # 不预处理
)
if result.success:
print(f"识别成功!")
print(f"平均置信度: {result.total_confidence:.2f}")
print(f"识别行数: {len(result.results)}")
print(f"完整文本:\n{result.full_text}")
else:
print(f"识别失败: {result.error_message}")
def example_2_with_preprocess():
"""示例 2: 带预处理的识别(适合低质量图片)"""
print("\n示例 2: 带预处理的识别")
print("-" * 50)
result = recognize_text(
image="path/to/your/image.png",
mode="local",
lang="ch",
preprocess=True # 启用预处理(增强对比度、锐度等)
)
if result.success:
print(f"识别成功!")
print(f"完整文本:\n{result.full_text}")
def example_3_engine_directly():
"""示例 3: 直接使用 OCR 引擎"""
print("\n示例 3: 直接使用 OCR 引擎")
print("-" * 50)
# 创建引擎
config = {
'lang': 'ch', # 语言
'use_gpu': False, # 是否使用 GPU
'show_log': False # 是否显示日志
}
engine = PaddleOCREngine(config)
# 识别图片
result = engine.recognize(
image="path/to/your/image.png",
preprocess=False
)
if result.success:
print(f"识别成功!")
print(f"完整文本:\n{result.full_text}")
# 遍历每一行
for line_result in result.results:
print(f"{line_result.line_index}: {line_result.text} (置信度: {line_result.confidence:.2f})")
def example_4_batch_images():
"""示例 4: 批量处理多张图片"""
print("\n示例 4: 批量处理多张图片")
print("-" * 50)
image_list = [
"path/to/image1.png",
"path/to/image2.png",
"path/to/image3.png"
]
engine = PaddleOCREngine({'lang': 'ch'})
for i, image_path in enumerate(image_list, 1):
print(f"\n处理图片 {i}: {image_path}")
result = engine.recognize(image_path)
if result.success:
print(f" 识别成功,置信度: {result.total_confidence:.2f}")
print(f" 文本预览: {result.full_text[:100]}...")
else:
print(f" 识别失败: {result.error_message}")
def example_5_image_preprocess():
"""示例 5: 图像预处理(增强识别效果)"""
print("\n示例 5: 图像预处理")
print("-" * 50)
# 预处理并保存
processed = preprocess_image(
image_path="path/to/input.png",
output_path="path/to/output_processed.png",
resize=True, # 调整大小
enhance_contrast=True, # 增强对比度
enhance_sharpness=True, # 增强锐度
denoise=False, # 不去噪
binarize=False # 不二值化
)
print(f"预处理完成,图像尺寸: {processed.size}")
# 然后对预处理后的图片进行 OCR
result = recognize_text(
image=processed, # 可以传入 PIL Image
mode="local",
lang="ch"
)
if result.success:
print(f"识别文本: {result.full_text}")
def example_6_multilanguage():
"""示例 6: 多语言识别"""
print("\n示例 6: 多语言识别")
print("-" * 50)
# 中文
result_ch = recognize_text(
image="path/to/chinese_image.png",
lang="ch" # 中文
)
print(f"中文识别置信度: {result_ch.total_confidence:.2f}")
# 英文
result_en = recognize_text(
image="path/to/english_image.png",
lang="en" # 英文
)
print(f"英文识别置信度: {result_en.total_confidence:.2f}")
# 中英混合
result_mix = recognize_text(
image="path/to/mixed_image.png",
lang="chinese_chinese" # 中英混合
)
print(f"混合识别置信度: {result_mix.total_confidence:.2f}")
def example_7_cloud_ocr():
"""示例 7: 云端 OCR需要配置"""
print("\n示例 7: 云端 OCR")
print("-" * 50)
# 配置云端 OCR
config = {
'api_endpoint': 'https://api.example.com/ocr',
'api_key': 'your_api_key_here',
'provider': 'custom',
'timeout': 30
}
engine = CloudOCREngine(config)
# 注意:云端 OCR 需要根据具体 API 实现 _send_request 方法
result = engine.recognize("path/to/image.png")
if result.success:
print(f"识别成功: {result.full_text}")
else:
print(f"云端 OCR 尚未实现: {result.error_message}")
def example_8_factory_pattern():
"""示例 8: 使用工厂模式创建引擎"""
print("\n示例 8: 使用工厂模式创建引擎")
print("-" * 50)
# 创建本地引擎
local_engine = OCRFactory.create_engine(
mode="local",
config={'lang': 'ch'}
)
print(f"本地引擎类型: {type(local_engine).__name__}")
# 创建云端引擎
cloud_engine = OCRFactory.create_engine(
mode="cloud",
config={'api_endpoint': 'https://api.example.com/ocr'}
)
print(f"云端引擎类型: {type(cloud_engine).__name__}")
def example_9_detailed_result():
"""示例 9: 处理详细识别结果"""
print("\n示例 9: 处理详细识别结果")
print("-" * 50)
result = recognize_text(
image="path/to/image.png",
mode="local",
lang="ch"
)
if result.success:
# 遍历每一行结果
for line_result in result.results:
print(f"\n{line_result.line_index}:")
print(f" 文本: {line_result.text}")
print(f" 置信度: {line_result.confidence:.2f}")
# 如果有坐标信息
if line_result.bbox:
print(f" 坐标: {line_result.bbox}")
# 统计信息
total_chars = sum(len(r.text) for r in result.results)
avg_confidence = sum(r.confidence for r in result.results) / len(result.results)
print(f"\n统计:")
print(f" 总行数: {len(result.results)}")
print(f" 总字符数: {total_chars}")
print(f" 平均置信度: {avg_confidence:.2f}")
def example_10_pil_image_input():
"""示例 10: 使用 PIL Image 作为输入"""
print("\n示例 10: 使用 PIL Image 作为输入")
print("-" * 50)
from PIL import Image
# 加载图像
pil_image = Image.open("path/to/image.png")
# 裁剪感兴趣区域
cropped = pil_image.crop((100, 100, 500, 300))
# 直接识别 PIL Image
result = recognize_text(
image=cropped, # 直接传入 PIL Image 对象
mode="local",
lang="ch"
)
if result.success:
print(f"识别结果: {result.full_text}")
if __name__ == '__main__':
print("OCR 模块使用示例")
print("=" * 50)
print("\n注意:运行这些示例前,请确保:")
print("1. 安装依赖: pip install paddleocr paddlepaddle")
print("2. 将示例中的 'path/to/image.png' 替换为实际图片路径")
print("=" * 50)
# 取消注释想要运行的示例
# example_1_quick_recognize()
# example_2_with_preprocess()
# example_3_engine_directly()
# example_4_batch_images()
# example_5_image_preprocess()
# example_6_multilanguage()
# example_7_cloud_ocr()
# example_8_factory_pattern()
# example_9_detailed_result()
# example_10_pil_image_input()