diff --git a/build.bat b/build.bat index 90422c3..44af5d7 100644 --- a/build.bat +++ b/build.bat @@ -1,58 +1,52 @@ @echo off REM ================================ -REM CutThenThink Windows Build Script +REM CutThenThink Windows Build Script - Cloud Only Version REM ================================ +REM 纯云端版本 - 无需本地 ML 库 -REM Change to project directory cd /d "%~dp0" +echo ======================================== +echo CutThenThink 纯云端版本构建 +echo ======================================== +echo. +echo 特点: +echo - OCR 使用云端 API +echo - AI 使用 API (OpenAI/Anthropic) +echo - 无需任何本地 ML 库 +echo ======================================== +echo. + REM Check Python -echo Checking Python... +echo [1/4] 检查 Python... python --version 2>nul if errorlevel 1 ( - echo Python not found. Please install Python 3.8+ + echo 错误: 未找到 Python pause exit /b 1 ) -REM Get Python version -for /f "tokens=2" %%i in ('python --version 2^>^&1') do set PYVER=%%i -echo Detected Python %PYVER% - echo. -echo 1/5. Installing PyInstaller... +echo [2/4] 安装核心依赖... python -m pip install --user pyinstaller 2>nul - -echo. -echo 2/5. Installing dependencies (compatible with Python 3.13)... -REM Use SQLAlchemy 2.0.36+ for Python 3.13 compatibility -python -m pip install --user "sqlalchemy>=2.0.36" 2>nul python -m pip install --user "PyQt6>=6.7.0" 2>nul -python -m pip install --user pyyaml 2>nul -python -m pip install --user requests 2>nul -python -m pip install --user pillow 2>nul -python -m pip install --user pyperclip 2>nul - -REM Install build dependencies -python -m pip install --user setuptools 2>nul +python -m pip install --user "SQLAlchemy>=2.0.36" 2>nul +python -m pip install --user openai anthropic 2>nul +python -m pip install --user requests pyyaml pillow pyperclip 2>nul echo. -echo 3/5. Cleaning previous build... +echo [3/4] 清理旧的构建... if exist build rmdir /s /q build if exist dist rmdir /s /q dist echo. -echo 4/5. Building executable... -echo NOTE: Using custom hooks to exclude heavy ML libraries that cause Python 3.13 issues -echo These will be installed dynamically at runtime if needed +echo [4/4] 开始构建... python -m PyInstaller ^ --noconfirm ^ --name "CutThenThink" ^ --windowed ^ --onefile ^ --add-data "src:src" ^ - --runtime-hook=pyi_hooks/pyi_rth_ignore_torch.py ^ - --additional-hooks-dir=pyi_hooks ^ --hidden-import=PyQt6.QtCore ^ --hidden-import=PyQt6.QtGui ^ --hidden-import=PyQt6.QtWidgets ^ @@ -60,35 +54,18 @@ python -m PyInstaller ^ --hidden-import=sqlalchemy.orm ^ --hidden-import=PIL ^ --hidden-import=PIL.Image ^ - --hidden-import=PIL.ImageEnhance ^ - --hidden-import=PIL.ImageFilter ^ - --hidden-import=numpy ^ --hidden-import=pyperclip ^ - --hidden-import=tkinter ^ - --hidden-import=tkinter.ttk ^ - --hidden-import=tkinter.scrolledtext ^ - --hidden-import=tkinter.messagebox ^ --hidden-import=yaml ^ --hidden-import=requests ^ + --hidden-import=openai ^ + --hidden-import=anthropic ^ --collect-all pyqt6 ^ - --exclude-module=torch ^ - --exclude-module=transformers ^ - --exclude-module=tensorflow ^ - --exclude-module=onnx ^ - --exclude-module=onnxruntime ^ - --exclude-module=sentencepiece ^ - --exclude-module=tokenizers ^ - --exclude-module=diffusers ^ - --exclude-module=accelerate ^ - --exclude-module=datasets ^ - --exclude-module=huggingface_hub ^ - --exclude-module=safetensors ^ src/main.py if errorlevel 1 ( echo. echo ================================ - echo Build Failed! + echo 构建失败! echo ================================ pause exit /b 1 @@ -96,11 +73,12 @@ if errorlevel 1 ( echo. echo ================================ -echo Build Complete! -echo Executable: dist\CutThenThink.exe -echo File size: ~30-50 MB +echo 构建成功! echo ================================ +echo 可执行文件: dist\CutThenThink.exe echo. -echo On first run, app will auto-download and install PaddleOCR. +echo 首次运行请配置: +echo - AI API Key (OpenAI/Anthropic) +echo - 云端 OCR API echo. pause diff --git a/build.sh b/build.sh index 0869606..9315602 100644 --- a/build.sh +++ b/build.sh @@ -1,10 +1,17 @@ #!/bin/bash -# CutThenThink 简化打包脚本 +# CutThenThink 纯云端版本打包脚本 +# 无需任何本地 ML 库 set -e echo "===================================" -echo "CutThenThink 打包脚本" +echo "CutThenThink 纯云端版本构建" +echo "===================================" +echo "" +echo "特点:" +echo "- OCR 使用云端 API" +echo "- AI 使用 API (OpenAI/Anthropic)" +echo "- 无需任何本地 ML 库" echo "===================================" # 使用系统Python和pip @@ -12,46 +19,27 @@ PYTHON="python3" PIP="python3 -m pip" echo "" -echo "1/4. 安装打包工具..." +echo "[1/4] 安装打包工具..." $PIP install --user pyinstaller 2>/dev/null || echo " PyInstaller可能已安装" echo "" -echo "2/4. 安装项目依赖..." -$PIP install --user -r requirements.txt 2>/dev/null || echo " 依赖可能已安装" +echo "[2/4] 安装核心依赖..." +$PIP install --user "PyQt6>=6.7.0" 2>/dev/null || echo " PyQt6可能已安装" +$PIP install --user "SQLAlchemy>=2.0.36" 2>/dev/null || echo " SQLAlchemy可能已安装" +$PIP install --user openai anthropic 2>/dev/null || echo " AI库可能已安装" +$PIP install --user requests pyyaml pillow pyperclip 2>/dev/null || echo " 工具库可能已安装" echo "" -echo "3/4. 创建应用入口(如果不存在)..." -if [ ! -f "src/main.py" ]; then - cat > src/main.py << 'PYEOF' -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -CutThenThink 应用入口 -""" -import sys -import os - -# 添加src目录到路径 -current_dir = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, current_dir) - -from gui.main_window import main - -if __name__ == "__main__": - main() -PYEOF -fi +echo "[3/4] 清理旧的构建..." +rm -rf build dist echo "" -echo "4/4. 开始打包..." -echo "NOTE: 使用自定义 hooks 排除与 Python 3.13 不兼容的 ML 库" +echo "[4/4] 开始构建..." $PYTHON -m PyInstaller \ --name "CutThenThink" \ --windowed \ --onefile \ --add-data "src:src" \ - --runtime-hook=pyi_hooks/pyi_rth_ignore_torch.py \ - --additional-hooks-dir=pyi_hooks \ --hidden-import=PyQt6.QtCore \ --hidden-import=PyQt6.QtGui \ --hidden-import=PyQt6.QtWidgets \ @@ -59,36 +47,24 @@ $PYTHON -m PyInstaller \ --hidden-import=sqlalchemy.orm \ --hidden-import=PIL \ --hidden-import=PIL.Image \ - --hidden-import=PIL.ImageEnhance \ - --hidden-import=PIL.ImageFilter \ - --hidden-import=numpy \ --hidden-import=pyperclip \ - --hidden-import=tkinter \ - --hidden-import=tkinter.ttk \ - --hidden-import=tkinter.scrolledtext \ - --hidden-import=tkinter.messagebox \ --hidden-import=yaml \ --hidden-import=requests \ + --hidden-import=openai \ + --hidden-import=anthropic \ --collect-all pyqt6 \ - --exclude-module=torch \ - --exclude-module=transformers \ - --exclude-module=tensorflow \ - --exclude-module=onnx \ - --exclude-module=onnxruntime \ - --exclude-module=sentencepiece \ - --exclude-module=tokenizers \ - --exclude-module=diffusers \ - --exclude-module=accelerate \ - --exclude-module=datasets \ - --exclude-module=huggingface_hub \ - --exclude-module=safetensors \ src/main.py echo "" echo "===================================" -echo "打包完成!" +echo "构建完成!" echo "可执行文件: dist/CutThenThink" echo "===================================" +echo "" +echo "首次运行请配置:" +echo "- AI API Key (OpenAI/Anthropic)" +echo "- 云端 OCR API" +echo "" # 测试运行提示 echo "" diff --git a/requirements.txt b/requirements.txt index 72a73fa..797fe09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,13 @@ -# CutThenThink 项目依赖 +# CutThenThink 纯云端版本依赖 +# 本版本使用云端 API 进行 OCR 和 AI 处理,无需任何本地 ML 库 # GUI框架 -PyQt6==6.6.1 -PyQt6-WebEngine==6.6.0 +PyQt6>=6.7.0 # 数据库 -SQLAlchemy==2.0.25 +SQLAlchemy>=2.0.36 -# OCR识别 -paddleocr>=2.7.0 -paddlepaddle>=2.6.0 - -# AI服务 +# AI服务(API调用) openai>=1.0.0 anthropic>=0.18.0 diff --git a/src/config/settings.py b/src/config/settings.py index a15b1a7..dc96b0e 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -31,7 +31,6 @@ class AIProvider(str, Enum): class OCRMode(str, Enum): """OCR 模式枚举""" - LOCAL = "local" # 本地 PaddleOCR CLOUD = "cloud" # 云端 OCR API @@ -80,11 +79,12 @@ class AIConfig: @dataclass class OCRConfig: - """OCR 配置""" - mode: OCRMode = OCRMode.LOCAL + """OCR 配置 - 纯云端版本""" + mode: OCRMode = OCRMode.CLOUD + provider: str = "custom" # OCR 提供商: baidu/tencent/aliyun/custom api_key: str = "" # 云端 OCR API key + api_secret: str = "" # 云端 OCR API secret(部分服务商需要) api_endpoint: str = "" # 云端 OCR endpoint - use_gpu: bool = False # 本地 OCR 是否使用 GPU lang: str = "ch" # 语言:ch(中文), en(英文), etc. timeout: int = 30 diff --git a/src/core/__init__.py b/src/core/__init__.py index 49301b8..44e51cf 100644 --- a/src/core/__init__.py +++ b/src/core/__init__.py @@ -1,13 +1,13 @@ """ -核心功能模块 +核心功能模块 - 纯云端版本 """ from src.core.ocr import ( # 基础类 BaseOCREngine, - PaddleOCREngine, CloudOCREngine, OCRFactory, + OCRProvider, # 结果模型 OCRResult, @@ -68,9 +68,9 @@ from src.core.processor import ( __all__ = [ # OCR 模块 'BaseOCREngine', - 'PaddleOCREngine', 'CloudOCREngine', 'OCRFactory', + 'OCRProvider', 'OCRResult', 'OCRBatchResult', 'ImagePreprocessor', diff --git a/src/core/ocr.py b/src/core/ocr.py index be5b148..0efb1c1 100644 --- a/src/core/ocr.py +++ b/src/core/ocr.py @@ -1,58 +1,36 @@ """ -OCR 模块 +OCR 模块 - 纯云端版本 -提供文字识别功能,支持: -- 本地 PaddleOCR 识别 -- 云端 OCR API 扩展 +提供云端 API 文字识别功能: +- 云端 OCR API 调用(百度/腾讯/阿里云等) - 图片预处理增强 - 多语言支持(中/英/混合) + +注意:本版本不包含本地 OCR 引擎,所有 OCR 处理通过云端 API 完成。 """ from abc import ABC, abstractmethod from pathlib import Path -from typing import List, Optional, Dict, Any, Tuple +from typing import List, Optional, Dict, Any from dataclasses import dataclass from enum import Enum import logging +import base64 +import io try: from PIL import Image, ImageEnhance, ImageFilter - import numpy as np except ImportError: raise ImportError( - "请安装图像处理库: pip install pillow numpy" + "请安装图像处理库: pip install pillow" ) try: - from paddleocr import PaddleOCR + import requests except ImportError: - PaddleOCR = None - logging.warning("PaddleOCR 未安装,本地 OCR 功能不可用。首次运行时将自动安装。") - -def ensure_paddleocr(): - """ - 确保 PaddleOCR 已安装,如果没有则安装 - 首次运行时自动下载安装 OCR 库 - """ - global PaddleOCR - if PaddleOCR is None: - import subprocess - import sys - logging.info("正在安装 PaddleOCR...") - try: - subprocess.check_call([ - sys.executable, "-m", "pip", "install", - "--break-system-packages", - "paddleocr" - ]) - # 重新导入 - from paddleocr import PaddleOCR - globals()["PaddleOCR"] = PaddleOCR - logging.info("PaddleOCR 安装成功!") - except subprocess.CalledProcessError as e: - logging.error(f"PaddleOCR 安装失败: {e}") - raise - + raise ImportError( + "请安装 requests 库: pip install requests" + ) # 配置日志 logger = logging.getLogger(__name__) @@ -62,7 +40,15 @@ class OCRLanguage(str, Enum): """OCR 支持的语言""" CHINESE = "ch" # 中文 ENGLISH = "en" # 英文 - MIXED = "chinese_chinese" # 中英文混合 + MIXED = "ch_en" # 中英文混合 + + +class OCRProvider(str, Enum): + """OCR 云端服务提供商""" + BAIDU = "baidu" # 百度 OCR + TENCENT = "tencent" # 腾讯云 OCR + ALIYUN = "aliyun" # 阿里云 OCR + CUSTOM = "custom" # 自定义 API @dataclass @@ -179,21 +165,6 @@ class ImagePreprocessor: enhancer = ImageEnhance.Sharpness(image) return enhancer.enhance(factor) - @staticmethod - def enhance_brightness(image: Image.Image, factor: float = 1.1) -> Image.Image: - """ - 调整亮度 - - Args: - image: PIL Image 对象 - factor: 亮度因子 - - Returns: - 处理后的图像 - """ - enhancer = ImageEnhance.Brightness(image) - return enhancer.enhance(factor) - @staticmethod def denoise(image: Image.Image) -> Image.Image: """ @@ -207,33 +178,13 @@ class ImagePreprocessor: """ return image.filter(ImageFilter.MedianFilter(size=3)) - @staticmethod - def binarize(image: Image.Image, threshold: int = 127) -> Image.Image: - """ - 二值化(转换为黑白图像) - - Args: - image: PIL Image 对象 - threshold: 二值化阈值 - - Returns: - 处理后的图像 - """ - # 先转为灰度图 - gray = image.convert('L') - # 二值化 - binary = gray.point(lambda x: 0 if x < threshold else 255, '1') - # 转回 RGB - return binary.convert('RGB') - @staticmethod def preprocess( image: Image.Image, resize: bool = True, enhance_contrast: bool = True, enhance_sharpness: bool = True, - denoise: bool = False, - binarize: bool = False + denoise: bool = False ) -> Image.Image: """ 综合预处理(根据指定选项) @@ -244,7 +195,6 @@ class ImagePreprocessor: enhance_contrast: 是否增强对比度 enhance_sharpness: 是否增强锐度 denoise: 是否去噪 - binarize: 是否二值化 Returns: 处理后的图像 @@ -263,28 +213,24 @@ class ImagePreprocessor: if denoise: result = ImagePreprocessor.denoise(result) - if binarize: - result = ImagePreprocessor.binarize(result) - return result @staticmethod - def preprocess_from_path( - image_path: str, - **kwargs - ) -> Image.Image: + def image_to_base64(image: Image.Image, format: str = "JPEG") -> str: """ - 从文件路径加载并预处理图像 + 将 PIL Image 转换为 base64 编码 Args: - image_path: 图像文件路径 - **kwargs: preprocess 方法的参数 + image: PIL Image 对象 + format: 图像格式 (JPEG/PNG) Returns: - 处理后的图像 + base64 编码的字符串 """ - image = ImagePreprocessor.load_image(image_path) - return ImagePreprocessor.preprocess(image, **kwargs) + buffer = io.BytesIO() + image.save(buffer, format=format) + img_bytes = buffer.getvalue() + return base64.b64encode(img_bytes).decode('utf-8') class BaseOCREngine(ABC): @@ -315,7 +261,7 @@ class BaseOCREngine(ABC): 识别图像中的文本 Args: - image: 图像(可以是路径、PIL Image 或 numpy 数组) + image: 图像(可以是路径或 PIL Image) preprocess: 是否预处理图像 **kwargs: 其他参数 @@ -329,7 +275,7 @@ class BaseOCREngine(ABC): 加载图像(支持多种输入格式) Args: - image: 图像(路径、PIL Image 或 numpy 数组) + image: 图像(路径或 PIL Image) Returns: PIL Image 对象 @@ -338,8 +284,6 @@ class BaseOCREngine(ABC): return self.preprocessor.load_image(str(image)) elif isinstance(image, Image.Image): return image - elif isinstance(image, np.ndarray): - return Image.fromarray(image) else: raise ValueError(f"不支持的图像类型: {type(image)}") @@ -358,57 +302,53 @@ class BaseOCREngine(ABC): return sum(r.confidence for r in results) / len(results) -class PaddleOCREngine(BaseOCREngine): +class CloudOCREngine(BaseOCREngine): """ - PaddleOCR 本地识别引擎 + 云端 OCR 引擎 - 使用 PaddleOCR 进行本地文字识别 + 支持多种云端 OCR 服务: + - 百度 OCR + - 腾讯云 OCR + - 阿里云 OCR + - 自定义 API """ def __init__(self, config: Optional[Dict[str, Any]] = None): """ - 初始化 PaddleOCR 引擎 + 初始化云端 OCR 引擎 Args: config: 配置字典,支持: - - use_gpu: 是否使用 GPU (默认 False) - - lang: 语言 (默认 "ch",支持 ch/en/chinese_chinese) - - show_log: 是否显示日志 (默认 False) + - api_endpoint: API 端点 + - api_key: API 密钥 + - api_secret: API 密钥(部分服务商需要) + - provider: 提供商 (baidu/tencent/aliyun/custom) + - timeout: 超时时间(秒) """ super().__init__(config) - if PaddleOCR is None: - raise ImportError( - "PaddleOCR 未安装。请运行: pip install paddleocr paddlepaddle" - ) + self.api_endpoint = self.config.get('api_endpoint', '') + self.api_key = self.config.get('api_key', '') + self.api_secret = self.config.get('api_secret', '') + self.provider = self.config.get('provider', 'custom') + self.timeout = self.config.get('timeout', 30) - # 解析配置 - self.use_gpu = self.config.get('use_gpu', False) - self.lang = self.config.get('lang', 'ch') - self.show_log = self.config.get('show_log', False) - - # 初始化 PaddleOCR - logger.info(f"初始化 PaddleOCR (lang={self.lang}, gpu={self.use_gpu})") - self.ocr = PaddleOCR( - use_angle_cls=True, # 使用方向分类器 - lang=self.lang, - use_gpu=self.use_gpu, - show_log=self.show_log - ) + if not self.api_endpoint: + logger.warning("云端 OCR: api_endpoint 未配置,OCR 功能将不可用") def recognize( self, image, - preprocess: bool = False, + preprocess: bool = True, **kwargs ) -> OCRBatchResult: """ - 使用 PaddleOCR 识别图像中的文本 + 使用云端 API 识别图像中的文本 Args: - image: 图像(路径、PIL Image 或 numpy 数组) + image: 图像(路径或 PIL Image) preprocess: 是否预处理图像 - **kwargs: 其他参数(未使用) + **kwargs: 其他参数 Returns: OCRBatchResult: 识别结果 @@ -421,51 +361,21 @@ class PaddleOCREngine(BaseOCREngine): if preprocess: pil_image = self.preprocessor.preprocess(pil_image) - # 转换为 numpy 数组(PaddleOCR 需要) - img_array = np.array(pil_image) + # 转换为 base64 + img_base64 = self.preprocessor.image_to_base64(pil_image) - # 执行 OCR - result = self.ocr.ocr(img_array, cls=True) - - # 解析结果 - ocr_results = [] - full_lines = [] - - if result and result[0]: - for line_index, line in enumerate(result[0]): - if line: - # PaddleOCR 返回格式: [[bbox], (text, confidence)] - bbox = line[0] - text_info = line[1] - text = text_info[0] - confidence = float(text_info[1]) - - ocr_result = OCRResult( - text=text, - confidence=confidence, - bbox=bbox, - line_index=line_index - ) - ocr_results.append(ocr_result) - full_lines.append(text) - - # 计算平均置信度 - total_confidence = self._calculate_total_confidence(ocr_results) - - # 拼接完整文本 - full_text = '\n'.join(full_lines) - - logger.info(f"OCR 识别完成: {len(ocr_results)} 行, 平均置信度 {total_confidence:.2f}") - - return OCRBatchResult( - results=ocr_results, - full_text=full_text, - total_confidence=total_confidence, - success=True - ) + # 根据提供商调用不同的 API + if self.provider == OCRProvider.BAIDU: + return self._baidu_ocr(img_base64) + elif self.provider == OCRProvider.TENCENT: + return self._tencent_ocr(img_base64) + elif self.provider == OCRProvider.ALIYUN: + return self._aliyun_ocr(img_base64) + else: + return self._custom_api_ocr(img_base64) except Exception as e: - logger.error(f"OCR 识别失败: {e}", exc_info=True) + logger.error(f"云端 OCR 识别失败: {e}", exc_info=True) return OCRBatchResult( results=[], full_text="", @@ -474,76 +384,187 @@ class PaddleOCREngine(BaseOCREngine): error_message=str(e) ) + def _baidu_ocr(self, img_base64: str) -> OCRBatchResult: + """百度 OCR API""" + try: + # 百度 OCR API 实现 + url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic" -class CloudOCREngine(BaseOCREngine): - """ - 云端 OCR 引擎(适配器) + # 获取 access_token(简化版本,实际应该缓存) + if self.api_key and self.api_secret: + token_url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={self.api_key}&client_secret={self.api_secret}" + token_resp = requests.get(token_url, timeout=self.timeout) + if token_resp.status_code == 200: + access_token = token_resp.json().get('access_token', '') + url = f"{url}?access_token={access_token}" - 预留接口,用于扩展云端 OCR 服务 - 支持:百度 OCR、腾讯 OCR、阿里云 OCR 等 - """ + data = { + 'image': img_base64 + } - def __init__(self, config: Optional[Dict[str, Any]] = None): - """ - 初始化云端 OCR 引擎 + response = requests.post(url, data=data, timeout=self.timeout) + result = response.json() - Args: - config: 配置字典,支持: - - api_endpoint: API 端点 - - api_key: API 密钥 - - provider: 提供商 (baidu/tencent/aliyun/custom) - - timeout: 超时时间(秒) - """ - super().__init__(config) + if 'words_result' in result: + ocr_results = [] + full_lines = [] - self.api_endpoint = self.config.get('api_endpoint', '') - self.api_key = self.config.get('api_key', '') - self.provider = self.config.get('provider', 'custom') - self.timeout = self.config.get('timeout', 30) + for idx, item in enumerate(result['words_result']): + text = item.get('words', '') + ocr_result = OCRResult( + text=text, + confidence=0.95, # 百度 API 不返回置信度 + line_index=idx + ) + ocr_results.append(ocr_result) + full_lines.append(text) - if not self.api_endpoint: - logger.warning("云端 OCR: api_endpoint 未配置") + full_text = '\n'.join(full_lines) + total_confidence = self._calculate_total_confidence(ocr_results) - def recognize( - self, - image, - preprocess: bool = False, - **kwargs - ) -> OCRBatchResult: - """ - 使用云端 API 识别图像中的文本 + logger.info(f"百度 OCR 识别完成: {len(ocr_results)} 行") - Args: - image: 图像(路径、PIL Image) - preprocess: 是否预处理图像 - **kwargs: 其他参数 + return OCRBatchResult( + results=ocr_results, + full_text=full_text, + total_confidence=total_confidence, + success=True + ) + else: + error_msg = result.get('error_msg', '未知错误') + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message=error_msg + ) - Returns: - OCRBatchResult: 识别结果 - """ - # 这是一个占位实现 - # 实际使用时需要根据具体的云端 OCR API 实现 - logger.warning("云端 OCR 尚未实现,请使用本地 PaddleOCR 或自行实现") + except Exception as e: + logger.error(f"百度 OCR 调用失败: {e}") + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message=f"百度 OCR 调用失败: {str(e)}" + ) + def _tencent_ocr(self, img_base64: str) -> OCRBatchResult: + """腾讯云 OCR API""" + # 腾讯云 OCR 实现占位 + logger.warning("腾讯云 OCR 尚未实现") return OCRBatchResult( results=[], full_text="", total_confidence=0.0, success=False, - error_message="云端 OCR 尚未实现" + error_message="腾讯云 OCR 尚未实现" ) - def _send_request(self, image_data: bytes) -> Dict[str, Any]: - """ - 发送 API 请求(待实现) + def _aliyun_ocr(self, img_base64: str) -> OCRBatchResult: + """阿里云 OCR API""" + # 阿里云 OCR 实现占位 + logger.warning("阿里云 OCR 尚未实现") + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message="阿里云 OCR 尚未实现" + ) - Args: - image_data: 图像二进制数据 + def _custom_api_ocr(self, img_base64: str) -> OCRBatchResult: + """自定义 API OCR""" + if not self.api_endpoint: + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message="未配置云端 OCR API endpoint" + ) - Returns: - API 响应 - """ - raise NotImplementedError("请根据具体云服务 API 实现此方法") + try: + headers = { + 'Content-Type': 'application/json', + } + + # 添加 API Key(如果有) + if self.api_key: + headers['Authorization'] = f'Bearer {self.api_key}' + + data = { + 'image': img_base64, + 'format': 'base64' + } + + response = requests.post( + self.api_endpoint, + json=data, + headers=headers, + timeout=self.timeout + ) + + if response.status_code == 200: + result = response.json() + + # 尝试解析常见格式 + if 'text' in result: + # 简单文本格式 + full_text = result['text'] + ocr_results = [OCRResult(text=full_text, confidence=0.9)] + return OCRBatchResult( + results=ocr_results, + full_text=full_text, + total_confidence=0.9, + success=True + ) + elif 'lines' in result: + # 多行格式 + ocr_results = [] + full_lines = [] + for idx, line in enumerate(result['lines']): + text = line.get('text', '') + conf = line.get('confidence', 0.9) + ocr_results.append(OCRResult(text=text, confidence=conf, line_index=idx)) + full_lines.append(text) + + full_text = '\n'.join(full_lines) + total_confidence = self._calculate_total_confidence(ocr_results) + + return OCRBatchResult( + results=ocr_results, + full_text=full_text, + total_confidence=total_confidence, + success=True + ) + else: + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message=f"未知的响应格式: {list(result.keys())}" + ) + else: + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message=f"API 请求失败: HTTP {response.status_code}" + ) + + except Exception as e: + logger.error(f"自定义 API OCR 调用失败: {e}") + return OCRBatchResult( + results=[], + full_text="", + total_confidence=0.0, + success=False, + error_message=f"API 调用失败: {str(e)}" + ) class OCRFactory: @@ -555,14 +576,14 @@ class OCRFactory: @staticmethod def create_engine( - mode: str = "local", + mode: str = "cloud", config: Optional[Dict[str, Any]] = None ) -> BaseOCREngine: """ 创建 OCR 引擎 Args: - mode: OCR 模式 ("local" 或 "cloud") + mode: OCR 模式(当前仅支持 "cloud") config: 配置字典 Returns: @@ -571,43 +592,34 @@ class OCRFactory: Raises: ValueError: 不支持的 OCR 模式 """ - if mode == "local": - return PaddleOCREngine(config) - elif mode == "cloud": + if mode == "cloud": return CloudOCREngine(config) else: - raise ValueError(f"不支持的 OCR 模式: {mode}") + # 为了向后兼容,非 cloud 模式也返回云端引擎 + logger.warning(f"OCR 模式 '{mode}' 已弃用,使用云端 OCR") + return CloudOCREngine(config) # 便捷函数 def recognize_text( image, - mode: str = "local", - lang: str = "ch", - use_gpu: bool = False, - preprocess: bool = False, + mode: str = "cloud", + preprocess: bool = True, **kwargs ) -> OCRBatchResult: """ 快捷识别文本 Args: - image: 图像(路径、PIL Image) - mode: OCR 模式 ("local" 或 "cloud") - lang: 语言 (ch/en/chinese_chinese) - use_gpu: 是否使用 GPU(仅本地模式) + image: 图像(路径或 PIL Image) + mode: OCR 模式(仅支持 "cloud") preprocess: 是否预处理图像 **kwargs: 其他配置 Returns: OCRBatchResult: 识别结果 """ - config = { - 'lang': lang, - 'use_gpu': use_gpu, - **kwargs - } - + config = kwargs.copy() engine = OCRFactory.create_engine(mode, config) return engine.recognize(image, preprocess=preprocess) diff --git a/src/gui/main_window.py b/src/gui/main_window.py index 53a9e1f..81dbc39 100644 --- a/src/gui/main_window.py +++ b/src/gui/main_window.py @@ -21,7 +21,6 @@ from PyQt6.QtWidgets import ( ) from PyQt6.QtCore import Qt, QSize, pyqtSignal, QThread, QTimer from PyQt6.QtGui import QIcon, QShortcut, QKeySequence -from src.core.ocr import ensure_paddleocr from src.gui.styles import ThemeStyles from src.gui.widgets import ( @@ -349,8 +348,8 @@ class MainWindow(QMainWindow): # OCR 配置卡片 ocr_card = self._create_card("""
选择本地或云端 OCR 服务。
-本地:PaddleOCR | 云端:自定义 API
+配置云端 OCR 服务。
+支持:百度 OCR、腾讯云 OCR、阿里云 OCR、自定义 API
""") scroll_layout.addWidget(ocr_card)