package main import ( "encoding/json" "flag" "fmt" "log" "os" "github.com/otiai10/gosseract/v2" ) const ( Version = "1.0.0" ) func main() { // 命令行参数 command := flag.String("command", "", "Command to execute (recognize, version)") imagePath := flag.String("image", "", "Path to image file") language := flag.String("lang", "eng+chi_sim", "OCR language (default: eng+chi_sim)") flag.Parse() if *command == "" || len(os.Args) < 2 { printUsage() os.Exit(1) } switch *command { case "version": printVersion() case "recognize": if *imagePath == "" { fmt.Fprintln(os.Stderr, "Error: image path is required for recognize command") os.Exit(1) } recognize(*imagePath, *language) default: fmt.Fprintf(os.Stderr, "Unknown command: %s\n", *command) printUsage() os.Exit(1) } } func printUsage() { fmt.Println("CutThenThink OCR Plugin v" + Version) fmt.Println("\nUsage:") fmt.Println(" ocr-plugin version - Print version information") fmt.Println(" ocr-plugin recognize -image - Recognize text from image") fmt.Println("\nOptions:") fmt.Println(" -lang - OCR language (default: eng+chi_sim)") fmt.Println("\nSupported languages:") fmt.Println(" eng - English") fmt.Println(" chi_sim - Simplified Chinese") fmt.Println(" chi_tra - Traditional Chinese") fmt.Println(" jpn - Japanese") fmt.Println(" kor - Korean") fmt.Println(" (combine with + for multiple languages)") } func printVersion() { fmt.Println(Version) } // OCRBlock represents a single text block with bounding box type OCRBlock struct { Text string `json:"text"` Confidence float32 `json:"confidence"` BBoxX uint32 `json:"bbox_x"` BBoxY uint32 `json:"bbox_y"` BBoxWidth uint32 `json:"bbox_width"` BBoxHeight uint32 `json:"bbox_height"` BlockType string `json:"block_type"` } // OCRResponse represents the JSON response from the plugin type OCRResponse struct { Success bool `json:"success"` Error *string `json:"error,omitempty"` Engine string `json:"engine,omitempty"` Language string `json:"language,omitempty"` Blocks []OCRBlock `json:"blocks"` } func recognize(imagePath, language string) { // Check if file exists if _, err := os.Stat(imagePath); os.IsNotExist(err) { response := OCRResponse{ Success: false, Error: stringPtr("Image file not found: " + imagePath), } printJSON(response) os.Exit(1) } // Create Tesseract client client := gosseract.NewClient() defer client.Close() // Set language client.SetLanguage(language) // Set image client.SetImage(imagePath) // Get text text, err := client.Text() if err != nil { response := OCRResponse{ Success: false, Error: stringPtr("OCR failed: " + err.Error()), } printJSON(response) os.Exit(1) } // Get detailed text boxes boxes, err := client.GetBoundingBoxes(gosseract.RIL_TEXT_LINE) if err != nil { log.Printf("Warning: Failed to get bounding boxes: %v", err) // Continue without bounding boxes } // Convert to our format var blocks []OCRBlock for i, box := range boxes { if box.Box != nil { blocks = append(blocks, OCRBlock{ Text: box.Word, Confidence: float32(box.Confidence), BBoxX: uint32(box.Box.Left), BBoxY: uint32(box.Box.Top), BBoxWidth: uint32(box.Box.Width), BBoxHeight: uint32(box.Box.Height), BlockType: "text", }) } else { // Fallback for lines without bounding boxes blocks = append(blocks, OCRBlock{ Text: box.Word, Confidence: float32(box.Confidence), BBoxX: 0, BBoxY: uint32(i * 20), BBoxWidth: 100, BBoxHeight: 20, BlockType: "text", }) } } // If no boxes found, create a single block with the full text if len(blocks) == 0 && text != "" { blocks = append(blocks, OCRBlock{ Text: text, Confidence: 80.0, // Default confidence BBoxX: 0, BBoxY: 0, BBoxWidth: 100, BBoxHeight: 100, BlockType: "text", }) } response := OCRResponse{ Success: true, Engine: "tesseract", Language: language, Blocks: blocks, } printJSON(response) } func printJSON(v interface{}) { encoder := json.NewEncoder(os.Stdout) encoder.SetIndent("", " ") if err := encoder.Encode(v); err != nil { log.Fatalf("Failed to encode JSON: %v", err) } } func stringPtr(s string) *string { return &s }