feat: 初始化 PicAnalysis 项目
完整的前后端图片分析应用,包含: - 后端:Express + Prisma + SQLite,101个单元测试全部通过 - 前端:React + TypeScript + Vite,47个单元测试,89.73%覆盖率 - E2E测试:Playwright 测试套件 - MCP集成:Playwright MCP配置完成并测试通过 功能模块: - 用户认证(JWT) - 文档管理(CRUD) - 待办管理(三态工作流) - 图片管理(上传、截图、OCR) 测试覆盖: - 后端单元测试:101/101 ✅ - 前端单元测试:47/47 ✅ - E2E测试:通过 ✅ - MCP Playwright测试:通过 ✅ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* OCR Service
|
||||
* Handles OCR processing and confidence validation
|
||||
*/
|
||||
|
||||
export interface OCRResult {
|
||||
text: string;
|
||||
confidence: number;
|
||||
shouldCreateDocument: boolean;
|
||||
}
|
||||
|
||||
export interface OCRProviderOptions {
|
||||
timeout?: number;
|
||||
retries?: number;
|
||||
}
|
||||
|
||||
export class OCRService {
|
||||
private static readonly DEFAULT_TIMEOUT = 10000; // 10 seconds
|
||||
private static readonly DEFAULT_RETRIES = 2;
|
||||
|
||||
/**
|
||||
* Determine if document should be created based on confidence
|
||||
* @param confidence - OCR confidence score (0-1)
|
||||
* @param threshold - Minimum threshold (default 0.3)
|
||||
* @returns boolean - True if document should be created
|
||||
*/
|
||||
static shouldCreateDocument(
|
||||
confidence: number,
|
||||
threshold: number = 0.3
|
||||
): boolean {
|
||||
// Validate inputs
|
||||
if (!this.isValidConfidence(confidence)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return confidence >= threshold;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate confidence score is in valid range
|
||||
* @param confidence - Confidence score to validate
|
||||
* @returns boolean - True if valid
|
||||
*/
|
||||
static isValidConfidence(confidence: number): boolean {
|
||||
return typeof confidence === 'number' &&
|
||||
!isNaN(confidence) &&
|
||||
confidence >= 0 &&
|
||||
confidence <= 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get initial processing status
|
||||
* @returns string - Initial status
|
||||
*/
|
||||
static getInitialStatus(): string {
|
||||
return 'pending';
|
||||
}
|
||||
|
||||
/**
|
||||
* Process OCR with retry logic
|
||||
* @param imageId - Image ID to process
|
||||
* @param provider - OCR provider function
|
||||
* @param options - OCR options
|
||||
* @returns Promise<OCRResult> - OCR result
|
||||
*/
|
||||
static async process(
|
||||
imageId: string,
|
||||
provider: (id: string) => Promise<{ text: string; confidence: number }>,
|
||||
options: OCRProviderOptions = {}
|
||||
): Promise<OCRResult> {
|
||||
const timeout = options.timeout ?? this.DEFAULT_TIMEOUT;
|
||||
const retries = options.retries ?? this.DEFAULT_RETRIES;
|
||||
|
||||
let lastError: Error | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= retries; attempt++) {
|
||||
try {
|
||||
// Add timeout to provider call
|
||||
const result = await Promise.race([
|
||||
provider(imageId),
|
||||
new Promise<never>((_, reject) =>
|
||||
setTimeout(() => reject(new Error('OCR timeout')), timeout)
|
||||
),
|
||||
]);
|
||||
|
||||
const shouldCreate = this.shouldCreateDocument(result.confidence);
|
||||
|
||||
return {
|
||||
text: result.text,
|
||||
confidence: result.confidence,
|
||||
shouldCreateDocument: shouldCreate,
|
||||
};
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
|
||||
// Don't retry on certain errors
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message === 'invalid image format' ||
|
||||
error.message.includes('Invalid'))
|
||||
) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Retry on transient errors
|
||||
if (attempt < retries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError || new Error('OCR processing failed');
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user