feat: 实现CutThenThink P0阶段核心功能
项目初始化 - 创建完整项目结构(src/, data/, docs/, examples/, tests/) - 配置requirements.txt依赖 - 创建.gitignore P0基础框架 - 数据库模型:Record模型,6种分类类型 - 配置管理:YAML配置,支持AI/OCR/云存储/UI配置 - OCR模块:PaddleOCR本地识别,支持云端扩展 - AI模块:支持OpenAI/Claude/通义/Ollama,6种分类 - 存储模块:完整CRUD,搜索,统计,导入导出 - 主窗口框架:侧边栏导航,米白配色方案 - 图片处理:截图/剪贴板/文件选择/图片预览 - 处理流程整合:OCR→AI→存储串联,Markdown展示,剪贴板复制 - 分类浏览:卡片网格展示,分类筛选,搜索,详情查看 技术栈 - PyQt6 + SQLAlchemy + PaddleOCR + OpenAI/Claude SDK - 共47个Python文件,4000+行代码 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
185
tests/test_ocr.py
Normal file
185
tests/test_ocr.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
OCR 模块测试脚本
|
||||
|
||||
用法:
|
||||
python test_ocr.py --image <图片路径> [--lang ch] [--gpu]
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目根目录到路径
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from src.core.ocr import (
|
||||
recognize_text,
|
||||
preprocess_image,
|
||||
PaddleOCREngine,
|
||||
CloudOCREngine,
|
||||
ImagePreprocessor,
|
||||
OCRLanguage
|
||||
)
|
||||
|
||||
|
||||
def test_ocr_basic(image_path: str, lang: str = "ch", use_gpu: bool = False):
|
||||
"""测试基本 OCR 识别"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试基本 OCR 识别")
|
||||
print(f"{'='*60}")
|
||||
print(f"图片路径: {image_path}")
|
||||
print(f"语言: {lang}")
|
||||
print(f"GPU: {use_gpu}")
|
||||
|
||||
result = recognize_text(
|
||||
image=image_path,
|
||||
mode="local",
|
||||
lang=lang,
|
||||
use_gpu=use_gpu,
|
||||
preprocess=False
|
||||
)
|
||||
|
||||
print(f"\n识别结果:")
|
||||
print(f" 成功: {result.success}")
|
||||
print(f" 识别行数: {len(result.results)}")
|
||||
print(f" 平均置信度: {result.total_confidence:.2f}")
|
||||
|
||||
if result.success:
|
||||
print(f"\n完整文本:")
|
||||
print("-" * 60)
|
||||
print(result.full_text)
|
||||
print("-" * 60)
|
||||
|
||||
# 显示前 5 行详细信息
|
||||
print(f"\n前 5 行详细信息:")
|
||||
for i, r in enumerate(result.results[:5]):
|
||||
print(f" [{i}] {r.text[:50]}... (置信度: {r.confidence:.2f})")
|
||||
else:
|
||||
print(f"\n错误: {result.error_message}")
|
||||
|
||||
|
||||
def test_ocr_with_preprocess(image_path: str, lang: str = "ch"):
|
||||
"""测试带预处理的 OCR 识别"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试带预处理的 OCR 识别")
|
||||
print(f"{'='*60}")
|
||||
|
||||
result = recognize_text(
|
||||
image=image_path,
|
||||
mode="local",
|
||||
lang=lang,
|
||||
preprocess=True
|
||||
)
|
||||
|
||||
print(f"\n识别结果:")
|
||||
print(f" 成功: {result.success}")
|
||||
print(f" 识别行数: {len(result.results)}")
|
||||
print(f" 平均置信度: {result.total_confidence:.2f}")
|
||||
|
||||
if result.success:
|
||||
print(f"\n完整文本:")
|
||||
print("-" * 60)
|
||||
print(result.full_text)
|
||||
print("-" * 60)
|
||||
|
||||
|
||||
def test_preprocess(image_path: str, output_dir: str = None):
|
||||
"""测试图像预处理功能"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试图像预处理功能")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if output_dir is None:
|
||||
output_dir = Path(image_path).parent / "processed"
|
||||
else:
|
||||
output_dir = Path(output_dir)
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 测试不同的预处理组合
|
||||
configs = {
|
||||
"原始图像": {},
|
||||
"调整大小": {"resize": True},
|
||||
"增强对比度": {"enhance_contrast": True},
|
||||
"增强锐度": {"enhance_sharpness": True},
|
||||
"去噪": {"denoise": True},
|
||||
"二值化": {"binarize": True},
|
||||
"综合增强": {
|
||||
"resize": True,
|
||||
"enhance_contrast": True,
|
||||
"enhance_sharpness": True
|
||||
}
|
||||
}
|
||||
|
||||
for name, config in configs.items():
|
||||
print(f"\n处理: {name}")
|
||||
output_path = output_dir / f"{Path(image_path).stem}_{name.replace(' ', '_')}.jpg"
|
||||
|
||||
try:
|
||||
processed = preprocess_image(
|
||||
image_path,
|
||||
output_path=str(output_path),
|
||||
**config
|
||||
)
|
||||
print(f" 保存到: {output_path}")
|
||||
print(f" 尺寸: {processed.size}")
|
||||
except Exception as e:
|
||||
print(f" 失败: {e}")
|
||||
|
||||
|
||||
def test_engine_directly():
|
||||
"""测试直接使用引擎"""
|
||||
print(f"\n{'='*60}")
|
||||
print(f"测试直接使用 OCR 引擎")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# 创建 PaddleOCR 引擎
|
||||
config = {
|
||||
'lang': 'ch',
|
||||
'use_gpu': False,
|
||||
'show_log': False
|
||||
}
|
||||
|
||||
print(f"\n创建 PaddleOCR 引擎...")
|
||||
engine = PaddleOCREngine(config)
|
||||
print(f"引擎类型: {type(engine).__name__}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="OCR 模块测试")
|
||||
parser.add_argument('--image', type=str, help='图片路径')
|
||||
parser.add_argument('--lang', type=str, default='ch', help='语言 (ch/en/chinese_chinese)')
|
||||
parser.add_argument('--gpu', action='store_true', help='使用 GPU')
|
||||
parser.add_argument('--preprocess-only', action='store_true', help='仅测试预处理')
|
||||
parser.add_argument('--engine-only', action='store_true', help='仅测试引擎创建')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 测试引擎创建
|
||||
test_engine_directly()
|
||||
|
||||
# 如果指定了图片
|
||||
if args.image:
|
||||
if not Path(args.image).exists():
|
||||
print(f"\n错误: 图片不存在: {args.image}")
|
||||
return
|
||||
|
||||
if args.preprocess_only:
|
||||
# 仅测试预处理
|
||||
test_preprocess(args.image)
|
||||
else:
|
||||
# 测试基本 OCR
|
||||
test_ocr_basic(args.image, args.lang, args.gpu)
|
||||
|
||||
# 测试带预处理的 OCR
|
||||
test_ocr_with_preprocess(args.image, args.lang)
|
||||
|
||||
# 测试预处理功能
|
||||
test_preprocess(args.image)
|
||||
else:
|
||||
print("\n提示: 使用 --image <图片路径> 来测试 OCR 识别功能")
|
||||
print("示例: python test_ocr.py --image /path/to/image.png")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user