feat: 添加 Docker 部署支持和多 OCR 提供商架构

- 添加完整的 Docker 配置 (Dockerfile, docker-compose.yml)
- 修复前端硬编码端口 4000,改用相对路径 /api
- 实现多 OCR 提供商架构 (Tesseract.js/Baidu/RapidOCR)
- 修复 Docker 环境中图片上传路径问题
- 添加用户设置页面和 AI 分析服务
- 更新 Prisma schema 支持 AI 分析结果
- 添加部署文档和 OCR 配置指南

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
congsh
2026-02-26 18:20:46 +08:00
parent f8472987f0
commit 358deeb380
39 changed files with 3169 additions and 71 deletions

View File

@@ -0,0 +1,475 @@
/**
* AI Service
* 支持 6 个主流 AI 服务商进行文档智能分析
* - GLM (智谱 AI)
* - MiniMax
* - DeepSeek
* - Kimi (月之暗面)
* - OpenAI
* - Anthropic (Claude)
*/
import { prisma } from '../lib/prisma';
// AI Provider 类型
export type AIProviderType = 'glm' | 'minimax' | 'deepseek' | 'kimi' | 'openai' | 'anthropic';
// AI 分析结果
export interface AIAnalysisResult {
suggested_tags: string[];
suggested_category?: string;
summary?: string;
raw_response: string;
provider: AIProviderType;
model: string;
}
// AI Provider 配置
export interface AIProviderConfig {
apiKey: string;
apiUrl: string;
model: string;
}
// Provider 配置映射
export interface AIConfig {
defaultProvider: AIProviderType;
glm: AIProviderConfig;
minimax: AIProviderConfig;
deepseek: AIProviderConfig;
kimi: AIProviderConfig;
openai: AIProviderConfig;
anthropic: AIProviderConfig;
}
// 分析选项
export interface AnalyzeOptions {
provider?: AIProviderType;
config?: AIConfig;
generateSummary?: boolean;
maxTags?: number;
}
// API 响应接口
interface ChatMessage {
role: 'system' | 'user' | 'assistant';
content: string;
}
interface ChatCompletionResponse {
choices: Array<{
message: {
content: string;
};
}>;
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
}
interface AnthropicResponse {
content: Array<{
type: string;
text: string;
}>;
usage?: {
input_tokens: number;
output_tokens: number;
};
}
/**
* AI 服务类
*/
export class AIService {
/**
* 从环境变量或用户配置获取 AI 配置
*/
private static getAIConfig(_userId: string): AIConfig {
return {
defaultProvider: 'glm',
glm: {
apiKey: process.env.GLM_API_KEY || '',
apiUrl: process.env.GLM_API_URL || 'https://open.bigmodel.cn/api/paas/v4/chat/completions',
model: process.env.GLM_MODEL || 'glm-4-flash',
},
minimax: {
apiKey: process.env.MINIMAX_API_KEY || '',
apiUrl: process.env.MINIMAX_API_URL || 'https://api.minimax.chat/v1/chat/completions',
model: process.env.MINIMAX_MODEL || 'abab6.5s-chat',
},
deepseek: {
apiKey: process.env.DEEPSEEK_API_KEY || '',
apiUrl: process.env.DEEPSEEK_API_URL || 'https://api.deepseek.com/v1/chat/completions',
model: process.env.DEEPSEEK_MODEL || 'deepseek-chat',
},
kimi: {
apiKey: process.env.KIMI_API_KEY || '',
apiUrl: process.env.KIMI_API_URL || 'https://api.moonshot.cn/v1/chat/completions',
model: process.env.KIMI_MODEL || 'moonshot-v1-8k',
},
openai: {
apiKey: process.env.OPENAI_API_KEY || '',
apiUrl: process.env.OPENAI_API_URL || 'https://api.openai.com/v1/chat/completions',
model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
},
anthropic: {
apiKey: process.env.ANTHROPIC_API_KEY || '',
apiUrl: process.env.ANTHROPIC_API_URL || 'https://api.anthropic.com/v1/messages',
model: process.env.ANTHROPIC_MODEL || 'claude-3-5-sonnet-20241022',
},
};
}
/**
* 调用 OpenAI 兼容的 API (GLM, MiniMax, DeepSeek, Kimi, OpenAI)
*/
private static async callOpenAICompatibleAPI(
config: AIProviderConfig,
messages: ChatMessage[]
): Promise<string> {
const response = await fetch(config.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${config.apiKey}`,
},
body: JSON.stringify({
model: config.model,
messages,
temperature: 0.3,
max_tokens: 1000,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`API 请求失败 (${response.status}): ${errorText}`);
}
const data = await response.json() as ChatCompletionResponse;
return data.choices[0]?.message?.content || '';
}
/**
* 调用 Anthropic Claude API
*/
private static async callAnthropicAPI(
config: AIProviderConfig,
messages: ChatMessage[]
): Promise<string> {
// Anthropic API 需要提取 system 消息
const systemMessage = messages.find(m => m.role === 'system')?.content || '';
const userMessages = messages.filter(m => m.role !== 'system');
const response = await fetch(config.apiUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-api-key': config.apiKey,
'anthropic-version': '2023-06-01',
},
body: JSON.stringify({
model: config.model,
system: systemMessage,
messages: userMessages,
max_tokens: 1000,
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Anthropic API 请求失败 (${response.status}): ${errorText}`);
}
const data = await response.json() as AnthropicResponse;
return data.content[0]?.text || '';
}
/**
* 调用 AI Provider 分析文档
*/
private static async callAIProvider(
provider: AIProviderType,
config: AIConfig,
content: string,
prompt: string
): Promise<string> {
const providerConfig = config[provider];
if (!providerConfig.apiKey) {
throw new Error(`${provider} API Key 未配置`);
}
const messages: ChatMessage[] = [
{
role: 'system',
content: `你是一个智能文档分析助手。${prompt}`,
},
{
role: 'user',
content: `请分析以下文档内容:\n\n${content}`,
},
];
// Anthropic 使用不同的 API 格式
if (provider === 'anthropic') {
return this.callAnthropicAPI(providerConfig, messages);
}
// 其他 Provider 使用 OpenAI 兼容格式
return this.callOpenAICompatibleAPI(providerConfig, messages);
}
/**
* 解析 AI 响应,提取结构化数据
*/
private static parseAIResponse(response: string): {
tags: string[];
category?: string;
summary?: string;
} {
const result = {
tags: [] as string[],
category: undefined as string | undefined,
summary: undefined as string | undefined,
};
// 尝试解析 JSON 格式响应
try {
// 查找 JSON 块
const jsonMatch = response.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
result.tags = parsed.tags || [];
result.category = parsed.category;
result.summary = parsed.summary;
return result;
}
} catch (e) {
// JSON 解析失败,使用文本解析
}
// 文本解析:按行查找标签、分类和摘要
const lines = response.split('\n');
for (const line of lines) {
const lowerLine = line.toLowerCase().trim();
if (lowerLine.startsWith('标签:') || lowerLine.startsWith('tags:')) {
const tags = line.substring(line.indexOf(':') + 1).trim();
result.tags = tags.split(/[,,、]/).map(t => t.trim()).filter(t => t);
} else if (lowerLine.startsWith('分类:') || lowerLine.startsWith('category:')) {
result.category = line.substring(line.indexOf(':') + 1).trim();
} else if (lowerLine.startsWith('摘要:') || lowerLine.startsWith('summary:')) {
result.summary = line.substring(line.indexOf(':') + 1).trim();
}
}
// 如果没有找到标签,尝试从响应中提取关键词
if (result.tags.length === 0) {
const words = response.match(/[\u4e00-\u9fa5]{2,4}/g) || [];
result.tags = [...new Set(words)].slice(0, 5);
}
return result;
}
/**
* 分析文档内容,生成智能标签和分类
*/
static async analyzeDocument(
documentId: string,
userId: string,
options: AnalyzeOptions = {}
): Promise<AIAnalysisResult> {
// 获取文档
const document = await prisma.document.findFirst({
where: {
id: documentId,
user_id: userId,
},
});
if (!document) {
throw new Error('文档不存在或无权访问');
}
// 获取 AI 配置
const config = options.config || this.getAIConfig(userId);
const provider = options.provider || config.defaultProvider;
// 检查 API Key
const providerConfig = config[provider];
if (!providerConfig.apiKey) {
throw new Error(`${provider} API Key 未配置,请在设置页面配置`);
}
// 构建分析提示词
const prompt = `请分析文档内容返回以下信息JSON 格式):
{
"tags": ["标签1", "标签2", "标签3"], // 3-5个关键词标签
"category": "建议的分类名称", // 可选
"summary": "一句话摘要" // 可选
}
要求:
1. 标签应该能体现文档的核心内容
2. 分类应该简洁明了(如:工作、学习、生活、技术等)
3. 摘要应该简洁概括文档要点
4. 直接返回 JSON不要有其他内容`;
// 调用 AI API
try {
const rawResponse = await this.callAIProvider(
provider,
config,
document.content,
prompt
);
// 解析响应
const parsed = this.parseAIResponse(rawResponse);
// 限制标签数量
const maxTags = options.maxTags || 5;
const suggestedTags = parsed.tags.slice(0, maxTags);
// 保存分析结果到数据库
await prisma.aIAnalysis.upsert({
where: { document_id: documentId },
create: {
document_id: documentId,
provider,
model: providerConfig.model,
suggested_tags: JSON.stringify(suggestedTags),
suggested_category: parsed.category,
summary: parsed.summary,
raw_response: rawResponse,
},
update: {
provider,
model: providerConfig.model,
suggested_tags: JSON.stringify(suggestedTags),
suggested_category: parsed.category,
summary: parsed.summary,
raw_response: rawResponse,
},
});
return {
suggested_tags: suggestedTags,
suggested_category: parsed.category,
summary: parsed.summary,
raw_response: rawResponse,
provider,
model: providerConfig.model,
};
} catch (error) {
console.error('[AI] 分析失败:', error);
throw error;
}
}
/**
* 获取文档的 AI 分析结果
*/
static async getAnalysis(documentId: string, userId: string) {
// 验证文档所有权
const document = await prisma.document.findFirst({
where: {
id: documentId,
user_id: userId,
},
});
if (!document) {
throw new Error('文档不存在或无权访问');
}
const analysis = await prisma.aIAnalysis.findUnique({
where: { document_id: documentId },
});
if (!analysis) {
return null;
}
return {
...analysis,
suggested_tags: JSON.parse(analysis.suggested_tags),
};
}
/**
* 删除文档的 AI 分析结果
*/
static async deleteAnalysis(documentId: string, userId: string) {
// 验证文档所有权
const document = await prisma.document.findFirst({
where: {
id: documentId,
user_id: userId,
},
});
if (!document) {
throw new Error('文档不存在或无权访问');
}
await prisma.aIAnalysis.delete({
where: { document_id: documentId },
});
}
/**
* 测试 AI Provider 连接
*/
static async testProvider(
provider: AIProviderType,
config?: Partial<AIConfig>
): Promise<{
success: boolean;
message: string;
error?: string;
}> {
try {
const fullConfig = config ? { ...this.getAIConfig(''), ...config } : this.getAIConfig('');
const providerConfig = fullConfig[provider];
if (!providerConfig.apiKey) {
return {
success: false,
message: 'API Key 未配置',
error: `${provider} API Key 未配置`,
};
}
// 发送简单测试请求
const messages: ChatMessage[] = [
{
role: 'user',
content: 'Hi',
},
];
if (provider === 'anthropic') {
await this.callAnthropicAPI(providerConfig, messages);
} else {
await this.callOpenAICompatibleAPI(providerConfig, messages);
}
return {
success: true,
message: `${provider} 连接成功`,
};
} catch (error) {
return {
success: false,
message: '连接失败',
error: error instanceof Error ? error.message : String(error),
};
}
}
}