feat: 添加 Docker 部署支持和多 OCR 提供商架构

- 添加完整的 Docker 配置 (Dockerfile, docker-compose.yml) - 修复前端硬编码端口 4000，改用相对路径 /api - 实现多 OCR 提供商架构 (Tesseract.js/Baidu/RapidOCR) - 修复 Docker 环境中图片上传路径问题 - 添加用户设置页面和 AI 分析服务 - 更新 Prisma schema 支持 AI 分析结果 - 添加部署文档和 OCR 配置指南 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 18:20:46 +08:00
parent f8472987f0
commit 358deeb380
39 changed files with 3169 additions and 71 deletions
--- a/backend/src/services/ai.service.ts
+++ b/backend/src/services/ai.service.ts
@@ -0,0 +1,475 @@
+/**
+ * AI Service
+ * 支持 6 个主流 AI 服务商进行文档智能分析
+ * - GLM (智谱 AI)
+ * - MiniMax
+ * - DeepSeek
+ * - Kimi (月之暗面)
+ * - OpenAI
+ * - Anthropic (Claude)
+ */
+
+import { prisma } from '../lib/prisma';
+
+// AI Provider 类型
+export type AIProviderType = 'glm' | 'minimax' | 'deepseek' | 'kimi' | 'openai' | 'anthropic';
+
+// AI 分析结果
+export interface AIAnalysisResult {
+  suggested_tags: string[];
+  suggested_category?: string;
+  summary?: string;
+  raw_response: string;
+  provider: AIProviderType;
+  model: string;
+}
+
+// AI Provider 配置
+export interface AIProviderConfig {
+  apiKey: string;
+  apiUrl: string;
+  model: string;
+}
+
+// Provider 配置映射
+export interface AIConfig {
+  defaultProvider: AIProviderType;
+  glm: AIProviderConfig;
+  minimax: AIProviderConfig;
+  deepseek: AIProviderConfig;
+  kimi: AIProviderConfig;
+  openai: AIProviderConfig;
+  anthropic: AIProviderConfig;
+}
+
+// 分析选项
+export interface AnalyzeOptions {
+  provider?: AIProviderType;
+  config?: AIConfig;
+  generateSummary?: boolean;
+  maxTags?: number;
+}
+
+// API 响应接口
+interface ChatMessage {
+  role: 'system' | 'user' | 'assistant';
+  content: string;
+}
+
+interface ChatCompletionResponse {
+  choices: Array<{
+    message: {
+      content: string;
+    };
+  }>;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+interface AnthropicResponse {
+  content: Array<{
+    type: string;
+    text: string;
+  }>;
+  usage?: {
+    input_tokens: number;
+    output_tokens: number;
+  };
+}
+
+/**
+ * AI 服务类
+ */
+export class AIService {
+  /**
+   * 从环境变量或用户配置获取 AI 配置
+   */
+  private static getAIConfig(_userId: string): AIConfig {
+    return {
+      defaultProvider: 'glm',
+      glm: {
+        apiKey: process.env.GLM_API_KEY || '',
+        apiUrl: process.env.GLM_API_URL || 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
+        model: process.env.GLM_MODEL || 'glm-4-flash',
+      },
+      minimax: {
+        apiKey: process.env.MINIMAX_API_KEY || '',
+        apiUrl: process.env.MINIMAX_API_URL || 'https://api.minimax.chat/v1/chat/completions',
+        model: process.env.MINIMAX_MODEL || 'abab6.5s-chat',
+      },
+      deepseek: {
+        apiKey: process.env.DEEPSEEK_API_KEY || '',
+        apiUrl: process.env.DEEPSEEK_API_URL || 'https://api.deepseek.com/v1/chat/completions',
+        model: process.env.DEEPSEEK_MODEL || 'deepseek-chat',
+      },
+      kimi: {
+        apiKey: process.env.KIMI_API_KEY || '',
+        apiUrl: process.env.KIMI_API_URL || 'https://api.moonshot.cn/v1/chat/completions',
+        model: process.env.KIMI_MODEL || 'moonshot-v1-8k',
+      },
+      openai: {
+        apiKey: process.env.OPENAI_API_KEY || '',
+        apiUrl: process.env.OPENAI_API_URL || 'https://api.openai.com/v1/chat/completions',
+        model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
+      },
+      anthropic: {
+        apiKey: process.env.ANTHROPIC_API_KEY || '',
+        apiUrl: process.env.ANTHROPIC_API_URL || 'https://api.anthropic.com/v1/messages',
+        model: process.env.ANTHROPIC_MODEL || 'claude-3-5-sonnet-20241022',
+      },
+    };
+  }
+
+  /**
+   * 调用 OpenAI 兼容的 API (GLM, MiniMax, DeepSeek, Kimi, OpenAI)
+   */
+  private static async callOpenAICompatibleAPI(
+    config: AIProviderConfig,
+    messages: ChatMessage[]
+  ): Promise<string> {
+    const response = await fetch(config.apiUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${config.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: config.model,
+        messages,
+        temperature: 0.3,
+        max_tokens: 1000,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`API 请求失败 (${response.status}): ${errorText}`);
+    }
+
+    const data = await response.json() as ChatCompletionResponse;
+    return data.choices[0]?.message?.content || '';
+  }
+
+  /**
+   * 调用 Anthropic Claude API
+   */
+  private static async callAnthropicAPI(
+    config: AIProviderConfig,
+    messages: ChatMessage[]
+  ): Promise<string> {
+    // Anthropic API 需要提取 system 消息
+    const systemMessage = messages.find(m => m.role === 'system')?.content || '';
+    const userMessages = messages.filter(m => m.role !== 'system');
+
+    const response = await fetch(config.apiUrl, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-api-key': config.apiKey,
+        'anthropic-version': '2023-06-01',
+      },
+      body: JSON.stringify({
+        model: config.model,
+        system: systemMessage,
+        messages: userMessages,
+        max_tokens: 1000,
+      }),
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`Anthropic API 请求失败 (${response.status}): ${errorText}`);
+    }
+
+    const data = await response.json() as AnthropicResponse;
+    return data.content[0]?.text || '';
+  }
+
+  /**
+   * 调用 AI Provider 分析文档
+   */
+  private static async callAIProvider(
+    provider: AIProviderType,
+    config: AIConfig,
+    content: string,
+    prompt: string
+  ): Promise<string> {
+    const providerConfig = config[provider];
+
+    if (!providerConfig.apiKey) {
+      throw new Error(`${provider} API Key 未配置`);
+    }
+
+    const messages: ChatMessage[] = [
+      {
+        role: 'system',
+        content: `你是一个智能文档分析助手。${prompt}`,
+      },
+      {
+        role: 'user',
+        content: `请分析以下文档内容：\n\n${content}`,
+      },
+    ];
+
+    // Anthropic 使用不同的 API 格式
+    if (provider === 'anthropic') {
+      return this.callAnthropicAPI(providerConfig, messages);
+    }
+
+    // 其他 Provider 使用 OpenAI 兼容格式
+    return this.callOpenAICompatibleAPI(providerConfig, messages);
+  }
+
+  /**
+   * 解析 AI 响应，提取结构化数据
+   */
+  private static parseAIResponse(response: string): {
+    tags: string[];
+    category?: string;
+    summary?: string;
+  } {
+    const result = {
+      tags: [] as string[],
+      category: undefined as string | undefined,
+      summary: undefined as string | undefined,
+    };
+
+    // 尝试解析 JSON 格式响应
+    try {
+      // 查找 JSON 块
+      const jsonMatch = response.match(/\{[\s\S]*\}/);
+      if (jsonMatch) {
+        const parsed = JSON.parse(jsonMatch[0]);
+        result.tags = parsed.tags || [];
+        result.category = parsed.category;
+        result.summary = parsed.summary;
+        return result;
+      }
+    } catch (e) {
+      // JSON 解析失败，使用文本解析
+    }
+
+    // 文本解析：按行查找标签、分类和摘要
+    const lines = response.split('\n');
+    for (const line of lines) {
+      const lowerLine = line.toLowerCase().trim();
+
+      if (lowerLine.startsWith('标签:') || lowerLine.startsWith('tags:')) {
+        const tags = line.substring(line.indexOf(':') + 1).trim();
+        result.tags = tags.split(/[,，、]/).map(t => t.trim()).filter(t => t);
+      } else if (lowerLine.startsWith('分类:') || lowerLine.startsWith('category:')) {
+        result.category = line.substring(line.indexOf(':') + 1).trim();
+      } else if (lowerLine.startsWith('摘要:') || lowerLine.startsWith('summary:')) {
+        result.summary = line.substring(line.indexOf(':') + 1).trim();
+      }
+    }
+
+    // 如果没有找到标签，尝试从响应中提取关键词
+    if (result.tags.length === 0) {
+      const words = response.match(/[\u4e00-\u9fa5]{2,4}/g) || [];
+      result.tags = [...new Set(words)].slice(0, 5);
+    }
+
+    return result;
+  }
+
+  /**
+   * 分析文档内容，生成智能标签和分类
+   */
+  static async analyzeDocument(
+    documentId: string,
+    userId: string,
+    options: AnalyzeOptions = {}
+  ): Promise<AIAnalysisResult> {
+    // 获取文档
+    const document = await prisma.document.findFirst({
+      where: {
+        id: documentId,
+        user_id: userId,
+      },
+    });
+
+    if (!document) {
+      throw new Error('文档不存在或无权访问');
+    }
+
+    // 获取 AI 配置
+    const config = options.config || this.getAIConfig(userId);
+    const provider = options.provider || config.defaultProvider;
+
+    // 检查 API Key
+    const providerConfig = config[provider];
+    if (!providerConfig.apiKey) {
+      throw new Error(`${provider} API Key 未配置，请在设置页面配置`);
+    }
+
+    // 构建分析提示词
+    const prompt = `请分析文档内容，返回以下信息（JSON 格式）：
+{
+  "tags": ["标签1", "标签2", "标签3"],  // 3-5个关键词标签
+  "category": "建议的分类名称",           // 可选
+  "summary": "一句话摘要"                // 可选
+}
+
+要求：
+1. 标签应该能体现文档的核心内容
+2. 分类应该简洁明了（如：工作、学习、生活、技术等）
+3. 摘要应该简洁概括文档要点
+4. 直接返回 JSON，不要有其他内容`;
+
+    // 调用 AI API
+    try {
+      const rawResponse = await this.callAIProvider(
+        provider,
+        config,
+        document.content,
+        prompt
+      );
+
+      // 解析响应
+      const parsed = this.parseAIResponse(rawResponse);
+
+      // 限制标签数量
+      const maxTags = options.maxTags || 5;
+      const suggestedTags = parsed.tags.slice(0, maxTags);
+
+      // 保存分析结果到数据库
+      await prisma.aIAnalysis.upsert({
+        where: { document_id: documentId },
+        create: {
+          document_id: documentId,
+          provider,
+          model: providerConfig.model,
+          suggested_tags: JSON.stringify(suggestedTags),
+          suggested_category: parsed.category,
+          summary: parsed.summary,
+          raw_response: rawResponse,
+        },
+        update: {
+          provider,
+          model: providerConfig.model,
+          suggested_tags: JSON.stringify(suggestedTags),
+          suggested_category: parsed.category,
+          summary: parsed.summary,
+          raw_response: rawResponse,
+        },
+      });
+
+      return {
+        suggested_tags: suggestedTags,
+        suggested_category: parsed.category,
+        summary: parsed.summary,
+        raw_response: rawResponse,
+        provider,
+        model: providerConfig.model,
+      };
+    } catch (error) {
+      console.error('[AI] 分析失败:', error);
+      throw error;
+    }
+  }
+
+  /**
+   * 获取文档的 AI 分析结果
+   */
+  static async getAnalysis(documentId: string, userId: string) {
+    // 验证文档所有权
+    const document = await prisma.document.findFirst({
+      where: {
+        id: documentId,
+        user_id: userId,
+      },
+    });
+
+    if (!document) {
+      throw new Error('文档不存在或无权访问');
+    }
+
+    const analysis = await prisma.aIAnalysis.findUnique({
+      where: { document_id: documentId },
+    });
+
+    if (!analysis) {
+      return null;
+    }
+
+    return {
+      ...analysis,
+      suggested_tags: JSON.parse(analysis.suggested_tags),
+    };
+  }
+
+  /**
+   * 删除文档的 AI 分析结果
+   */
+  static async deleteAnalysis(documentId: string, userId: string) {
+    // 验证文档所有权
+    const document = await prisma.document.findFirst({
+      where: {
+        id: documentId,
+        user_id: userId,
+      },
+    });
+
+    if (!document) {
+      throw new Error('文档不存在或无权访问');
+    }
+
+    await prisma.aIAnalysis.delete({
+      where: { document_id: documentId },
+    });
+  }
+
+  /**
+   * 测试 AI Provider 连接
+   */
+  static async testProvider(
+    provider: AIProviderType,
+    config?: Partial<AIConfig>
+  ): Promise<{
+    success: boolean;
+    message: string;
+    error?: string;
+  }> {
+    try {
+      const fullConfig = config ? { ...this.getAIConfig(''), ...config } : this.getAIConfig('');
+      const providerConfig = fullConfig[provider];
+
+      if (!providerConfig.apiKey) {
+        return {
+          success: false,
+          message: 'API Key 未配置',
+          error: `${provider} API Key 未配置`,
+        };
+      }
+
+      // 发送简单测试请求
+      const messages: ChatMessage[] = [
+        {
+          role: 'user',
+          content: 'Hi',
+        },
+      ];
+
+      if (provider === 'anthropic') {
+        await this.callAnthropicAPI(providerConfig, messages);
+      } else {
+        await this.callOpenAICompatibleAPI(providerConfig, messages);
+      }
+
+      return {
+        success: true,
+        message: `${provider} 连接成功`,
+      };
+    } catch (error) {
+      return {
+        success: false,
+        message: '连接失败',
+        error: error instanceof Error ? error.message : String(error),
+      };
+    }
+  }
+}