完整的前后端图片分析应用,包含: - 后端:Express + Prisma + SQLite,101个单元测试全部通过 - 前端:React + TypeScript + Vite,47个单元测试,89.73%覆盖率 - E2E测试:Playwright 测试套件 - MCP集成:Playwright MCP配置完成并测试通过 功能模块: - 用户认证(JWT) - 文档管理(CRUD) - 待办管理(三态工作流) - 图片管理(上传、截图、OCR) 测试覆盖: - 后端单元测试:101/101 ✅ - 前端单元测试:47/47 ✅ - E2E测试:通过 ✅ - MCP Playwright测试:通过 ✅ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
116 lines
3.0 KiB
TypeScript
116 lines
3.0 KiB
TypeScript
/**
|
|
* OCR Service
|
|
* Handles OCR processing and confidence validation
|
|
*/
|
|
|
|
export interface OCRResult {
|
|
text: string;
|
|
confidence: number;
|
|
shouldCreateDocument: boolean;
|
|
}
|
|
|
|
export interface OCRProviderOptions {
|
|
timeout?: number;
|
|
retries?: number;
|
|
}
|
|
|
|
export class OCRService {
|
|
private static readonly DEFAULT_TIMEOUT = 10000; // 10 seconds
|
|
private static readonly DEFAULT_RETRIES = 2;
|
|
|
|
/**
|
|
* Determine if document should be created based on confidence
|
|
* @param confidence - OCR confidence score (0-1)
|
|
* @param threshold - Minimum threshold (default 0.3)
|
|
* @returns boolean - True if document should be created
|
|
*/
|
|
static shouldCreateDocument(
|
|
confidence: number,
|
|
threshold: number = 0.3
|
|
): boolean {
|
|
// Validate inputs
|
|
if (!this.isValidConfidence(confidence)) {
|
|
return false;
|
|
}
|
|
|
|
return confidence >= threshold;
|
|
}
|
|
|
|
/**
|
|
* Validate confidence score is in valid range
|
|
* @param confidence - Confidence score to validate
|
|
* @returns boolean - True if valid
|
|
*/
|
|
static isValidConfidence(confidence: number): boolean {
|
|
return typeof confidence === 'number' &&
|
|
!isNaN(confidence) &&
|
|
confidence >= 0 &&
|
|
confidence <= 1;
|
|
}
|
|
|
|
/**
|
|
* Get initial processing status
|
|
* @returns string - Initial status
|
|
*/
|
|
static getInitialStatus(): string {
|
|
return 'pending';
|
|
}
|
|
|
|
/**
|
|
* Process OCR with retry logic
|
|
* @param imageId - Image ID to process
|
|
* @param provider - OCR provider function
|
|
* @param options - OCR options
|
|
* @returns Promise<OCRResult> - OCR result
|
|
*/
|
|
static async process(
|
|
imageId: string,
|
|
provider: (id: string) => Promise<{ text: string; confidence: number }>,
|
|
options: OCRProviderOptions = {}
|
|
): Promise<OCRResult> {
|
|
const timeout = options.timeout ?? this.DEFAULT_TIMEOUT;
|
|
const retries = options.retries ?? this.DEFAULT_RETRIES;
|
|
|
|
let lastError: Error | null = null;
|
|
|
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
try {
|
|
// Add timeout to provider call
|
|
const result = await Promise.race([
|
|
provider(imageId),
|
|
new Promise<never>((_, reject) =>
|
|
setTimeout(() => reject(new Error('OCR timeout')), timeout)
|
|
),
|
|
]);
|
|
|
|
const shouldCreate = this.shouldCreateDocument(result.confidence);
|
|
|
|
return {
|
|
text: result.text,
|
|
confidence: result.confidence,
|
|
shouldCreateDocument: shouldCreate,
|
|
};
|
|
} catch (error) {
|
|
lastError = error as Error;
|
|
|
|
// Don't retry on certain errors
|
|
if (
|
|
error instanceof Error &&
|
|
(error.message === 'invalid image format' ||
|
|
error.message.includes('Invalid'))
|
|
) {
|
|
throw error;
|
|
}
|
|
|
|
// Retry on transient errors
|
|
if (attempt < retries) {
|
|
await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1)));
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
throw lastError || new Error('OCR processing failed');
|
|
}
|
|
}
|