Files
cutThenThink/tests/test_processor.py

243 lines
7.0 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
处理流程整合测试
测试 OCR -> AI -> 存储的完整流程
"""
import sys
import unittest
from pathlib import Path
from unittest.mock import Mock, patch, MagicMock
# 添加项目根目录到路径
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from src.core.processor import (
ImageProcessor,
ProcessCallback,
ProcessResult,
create_markdown_result
)
from src.core.ocr import OCRBatchResult, OCRResult
from src.core.ai import ClassificationResult, CategoryType
class TestProcessResult(unittest.TestCase):
"""测试 ProcessResult 数据结构"""
def test_create_result(self):
"""测试创建结果"""
result = ProcessResult(
success=True,
image_path="/test/image.png",
process_time=1.5,
steps_completed=["ocr", "ai", "save"]
)
self.assertTrue(result.success)
self.assertEqual(result.image_path, "/test/image.png")
self.assertEqual(result.process_time, 1.5)
self.assertEqual(len(result.steps_completed), 3)
def test_to_dict(self):
"""测试转换为字典"""
result = ProcessResult(
success=True,
image_path="/test/image.png",
process_time=1.5,
steps_completed=["ocr"]
)
data = result.to_dict()
self.assertIsInstance(data, dict)
self.assertTrue(data['success'])
self.assertEqual(data['image_path'], "/test/image.png")
class TestCreateMarkdownResult(unittest.TestCase):
"""测试 Markdown 格式化"""
def test_with_ai_result(self):
"""测试有 AI 结果的情况"""
ai_result = ClassificationResult(
category=CategoryType.NOTE,
confidence=0.95,
title="测试标题",
content="测试内容",
tags=["标签1", "标签2"]
)
markdown = create_markdown_result(ai_result, "OCR 文本")
self.assertIn("测试标题", markdown)
self.assertIn("测试内容", markdown)
self.assertIn("NOTE", markdown)
self.assertIn("标签1", markdown)
def test_without_ai_result(self):
"""测试没有 AI 结果的情况"""
markdown = create_markdown_result(None, "OCR 文本")
self.assertIn("OCR 文本", markdown)
self.assertIn("# 处理结果", markdown)
class TestProcessCallback(unittest.TestCase):
"""测试 ProcessCallback"""
def test_callback_methods(self):
"""测试回调方法"""
callback = ProcessCallback()
# 创建模拟函数
callback.on_start = Mock()
callback.on_ocr_start = Mock()
callback.on_ai_complete = Mock()
# 调用方法
callback.on_start("测试")
callback.on_ocr_start("OCR 开始")
ai_result = ClassificationResult(
category=CategoryType.TODO,
confidence=0.9,
title="TODO",
content="内容",
tags=[]
)
callback.on_ai_complete(ai_result)
# 验证调用
callback.on_start.assert_called_once_with("测试")
callback.on_ocr_start.assert_called_once_with("OCR 开始")
callback.on_ai_complete.assert_called_once_with(ai_result)
class TestImageProcessor(unittest.TestCase):
"""测试 ImageProcessor"""
def setUp(self):
"""设置测试环境"""
self.ocr_config = {
'mode': 'local',
'lang': 'ch',
'use_gpu': False
}
# 模拟 AI 配置
self.ai_config = Mock()
self.ai_config.provider.value = "anthropic"
self.ai_config.api_key = "test_key"
self.ai_config.model = "test_model"
self.ai_config.temperature = 0.7
self.ai_config.max_tokens = 4096
self.ai_config.timeout = 60
@patch('src.core.processor.init_database')
def test_init_processor(self, mock_init_db):
"""测试初始化处理器"""
callback = ProcessCallback()
processor = ImageProcessor(
ocr_config=self.ocr_config,
ai_config=self.ai_config,
db_path=":memory:",
callback=callback
)
self.assertIsNotNone(processor)
self.assertEqual(processor.ocr_config, self.ocr_config)
@patch('src.core.processor.recognize_text')
@patch('src.core.processor.init_database')
def test_process_image_skip_all(self, mock_init_db, mock_ocr):
"""测试跳过所有步骤"""
# 设置模拟
mock_ocr.return_value = OCRBatchResult(
results=[],
full_text="",
total_confidence=0.0,
success=True
)
callback = ProcessCallback()
processor = ImageProcessor(
ocr_config=self.ocr_config,
ai_config=None, # 没有 AI 配置
db_path=":memory:",
callback=callback
)
# 处理图片(跳过 OCR 和 AI
result = processor.process_image(
image_path="/test/fake.png",
skip_ocr=True,
skip_ai=True,
save_to_db=False
)
# 验证
self.assertIsNotNone(result)
self.assertEqual(result.image_path, "/test/fake.png")
class TestIntegration(unittest.TestCase):
"""集成测试"""
def test_full_workflow_mock(self):
"""测试完整工作流(使用 Mock"""
# 创建模拟的 OCR 结果
ocr_result = OCRBatchResult(
results=[
OCRResult(text="第一行文本", confidence=0.95, line_index=0),
OCRResult(text="第二行文本", confidence=0.90, line_index=1)
],
full_text="第一行文本\n第二行文本",
total_confidence=0.925,
success=True
)
# 创建模拟的 AI 结果
ai_result = ClassificationResult(
category=CategoryType.NOTE,
confidence=0.95,
title="测试笔记",
content="## 笔记内容\n\n- 要点1\n- 要点2",
tags=["测试", "笔记"]
)
# 验证 Markdown 格式
markdown = create_markdown_result(ai_result, ocr_result.full_text)
self.assertIn("测试笔记", markdown)
self.assertIn("NOTE", markdown)
self.assertIn("笔记内容", markdown)
def run_tests():
"""运行测试"""
# 创建测试套件
loader = unittest.TestLoader()
suite = unittest.TestSuite()
# 添加测试
suite.addTests(loader.loadTestsFromTestCase(TestProcessResult))
suite.addTests(loader.loadTestsFromTestCase(TestCreateMarkdownResult))
suite.addTests(loader.loadTestsFromTestCase(TestProcessCallback))
suite.addTests(loader.loadTestsFromTestCase(TestImageProcessor))
suite.addTests(loader.loadTestsFromTestCase(TestIntegration))
# 运行测试
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
# 返回结果
return result.wasSuccessful()
if __name__ == "__main__":
success = run_tests()
sys.exit(0 if success else 1)