Files
cutThink_lite/src-ocr-plugin/main.go

186 lines
4.3 KiB
Go
Raw Normal View History

package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"github.com/otiai10/gosseract/v2"
)
const (
Version = "1.0.0"
)
func main() {
// 命令行参数
command := flag.String("command", "", "Command to execute (recognize, version)")
imagePath := flag.String("image", "", "Path to image file")
language := flag.String("lang", "eng+chi_sim", "OCR language (default: eng+chi_sim)")
flag.Parse()
if *command == "" || len(os.Args) < 2 {
printUsage()
os.Exit(1)
}
switch *command {
case "version":
printVersion()
case "recognize":
if *imagePath == "" {
fmt.Fprintln(os.Stderr, "Error: image path is required for recognize command")
os.Exit(1)
}
recognize(*imagePath, *language)
default:
fmt.Fprintf(os.Stderr, "Unknown command: %s\n", *command)
printUsage()
os.Exit(1)
}
}
func printUsage() {
fmt.Println("CutThenThink OCR Plugin v" + Version)
fmt.Println("\nUsage:")
fmt.Println(" ocr-plugin version - Print version information")
fmt.Println(" ocr-plugin recognize -image <path> - Recognize text from image")
fmt.Println("\nOptions:")
fmt.Println(" -lang <language> - OCR language (default: eng+chi_sim)")
fmt.Println("\nSupported languages:")
fmt.Println(" eng - English")
fmt.Println(" chi_sim - Simplified Chinese")
fmt.Println(" chi_tra - Traditional Chinese")
fmt.Println(" jpn - Japanese")
fmt.Println(" kor - Korean")
fmt.Println(" (combine with + for multiple languages)")
}
func printVersion() {
fmt.Println(Version)
}
// OCRBlock represents a single text block with bounding box
type OCRBlock struct {
Text string `json:"text"`
Confidence float32 `json:"confidence"`
BBoxX uint32 `json:"bbox_x"`
BBoxY uint32 `json:"bbox_y"`
BBoxWidth uint32 `json:"bbox_width"`
BBoxHeight uint32 `json:"bbox_height"`
BlockType string `json:"block_type"`
}
// OCRResponse represents the JSON response from the plugin
type OCRResponse struct {
Success bool `json:"success"`
Error *string `json:"error,omitempty"`
Engine string `json:"engine,omitempty"`
Language string `json:"language,omitempty"`
Blocks []OCRBlock `json:"blocks"`
}
func recognize(imagePath, language string) {
// Check if file exists
if _, err := os.Stat(imagePath); os.IsNotExist(err) {
response := OCRResponse{
Success: false,
Error: stringPtr("Image file not found: " + imagePath),
}
printJSON(response)
os.Exit(1)
}
// Create Tesseract client
client := gosseract.NewClient()
defer client.Close()
// Set language
client.SetLanguage(language)
// Set image
client.SetImage(imagePath)
// Get text
text, err := client.Text()
if err != nil {
response := OCRResponse{
Success: false,
Error: stringPtr("OCR failed: " + err.Error()),
}
printJSON(response)
os.Exit(1)
}
// Get detailed text boxes
boxes, err := client.GetBoundingBoxes(gosseract.RIL_TEXT_LINE)
if err != nil {
log.Printf("Warning: Failed to get bounding boxes: %v", err)
// Continue without bounding boxes
}
// Convert to our format
var blocks []OCRBlock
for i, box := range boxes {
if box.Box != nil {
blocks = append(blocks, OCRBlock{
Text: box.Word,
Confidence: float32(box.Confidence),
BBoxX: uint32(box.Box.Left),
BBoxY: uint32(box.Box.Top),
BBoxWidth: uint32(box.Box.Width),
BBoxHeight: uint32(box.Box.Height),
BlockType: "text",
})
} else {
// Fallback for lines without bounding boxes
blocks = append(blocks, OCRBlock{
Text: box.Word,
Confidence: float32(box.Confidence),
BBoxX: 0,
BBoxY: uint32(i * 20),
BBoxWidth: 100,
BBoxHeight: 20,
BlockType: "text",
})
}
}
// If no boxes found, create a single block with the full text
if len(blocks) == 0 && text != "" {
blocks = append(blocks, OCRBlock{
Text: text,
Confidence: 80.0, // Default confidence
BBoxX: 0,
BBoxY: 0,
BBoxWidth: 100,
BBoxHeight: 100,
BlockType: "text",
})
}
response := OCRResponse{
Success: true,
Engine: "tesseract",
Language: language,
Blocks: blocks,
}
printJSON(response)
}
func printJSON(v interface{}) {
encoder := json.NewEncoder(os.Stdout)
encoder.SetIndent("", " ")
if err := encoder.Encode(v); err != nil {
log.Fatalf("Failed to encode JSON: %v", err)
}
}
func stringPtr(s string) *string {
return &s
}