/** * OCR Service * Handles OCR processing and confidence validation */ export interface OCRResult { text: string; confidence: number; shouldCreateDocument: boolean; } export interface OCRProviderOptions { timeout?: number; retries?: number; } export class OCRService { private static readonly DEFAULT_TIMEOUT = 10000; // 10 seconds private static readonly DEFAULT_RETRIES = 2; /** * Determine if document should be created based on confidence * @param confidence - OCR confidence score (0-1) * @param threshold - Minimum threshold (default 0.3) * @returns boolean - True if document should be created */ static shouldCreateDocument( confidence: number, threshold: number = 0.3 ): boolean { // Validate inputs if (!this.isValidConfidence(confidence)) { return false; } return confidence >= threshold; } /** * Validate confidence score is in valid range * @param confidence - Confidence score to validate * @returns boolean - True if valid */ static isValidConfidence(confidence: number): boolean { return typeof confidence === 'number' && !isNaN(confidence) && confidence >= 0 && confidence <= 1; } /** * Get initial processing status * @returns string - Initial status */ static getInitialStatus(): string { return 'pending'; } /** * Process OCR with retry logic * @param imageId - Image ID to process * @param provider - OCR provider function * @param options - OCR options * @returns Promise - OCR result */ static async process( imageId: string, provider: (id: string) => Promise<{ text: string; confidence: number }>, options: OCRProviderOptions = {} ): Promise { const timeout = options.timeout ?? this.DEFAULT_TIMEOUT; const retries = options.retries ?? this.DEFAULT_RETRIES; let lastError: Error | null = null; for (let attempt = 0; attempt <= retries; attempt++) { try { // Add timeout to provider call const result = await Promise.race([ provider(imageId), new Promise((_, reject) => setTimeout(() => reject(new Error('OCR timeout')), timeout) ), ]); const shouldCreate = this.shouldCreateDocument(result.confidence); return { text: result.text, confidence: result.confidence, shouldCreateDocument: shouldCreate, }; } catch (error) { lastError = error as Error; // Don't retry on certain errors if ( error instanceof Error && (error.message === 'invalid image format' || error.message.includes('Invalid')) ) { throw error; } // Retry on transient errors if (attempt < retries) { await new Promise((resolve) => setTimeout(resolve, 1000 * (attempt + 1))); continue; } } } throw lastError || new Error('OCR processing failed'); } }