# MineNASAI - PoC 验证计划 **创建日期**: 2025-02-04 **目标**: 验证3个关键技术的可行性 **预计时间**: 2-3天 --- ## 验证目标 ### 为什么需要 PoC? 在投入大量时间开发之前,我们需要验证以下关键技术的可行性: 1. **Claude Code CLI 集成** (最高优先级) - 风险:子进程管理、输出解析复杂 - 影响:这是核心功能,如果不可行需要重新设计 2. **智能路由算法** - 风险:路由准确度不足 - 影响:影响用户体验和资源利用率 3. **MCP Server 加载** - 风险:MCP协议不熟悉、进程通信复杂 - 影响:工具扩展能力的基础 --- ## PoC 1: Claude Code CLI 集成 ### 目标 验证可以通过 Python 子进程调用 Claude Code CLI,并正确解析其输出。 ### 验证内容 - [ ] 子进程启动和管理 - [ ] 实时输出流捕获 - [ ] ANSI 转义序列处理 - [ ] 交互式输入处理 - [ ] 超时和资源限制 - [ ] 错误处理和恢复 ### 实施步骤 #### 步骤1: 环境准备 ```bash # 1. 确保已安装 Claude Code CLI # 如果没有,访问: https://docs.anthropic.com/claude/docs/claude-code # 2. 验证 CLI 可用 claude --version # 3. 测试基本调用 claude "print hello world in python" # 4. 创建 PoC 工作目录 mkdir -p poc/claude_cli_test cd poc/claude_cli_test ``` #### 步骤2: 基础子进程调用 (poc_1_basic.py) ```python """PoC 1.1: Basic Claude CLI subprocess call.""" import subprocess import sys def test_basic_call(): """测试基本的 CLI 调用.""" print("=== PoC 1.1: Basic CLI Call ===\n") # 简单命令 cmd = ["claude", "print hello world in python"] print(f"执行命令: {' '.join(cmd)}") print("-" * 60) try: # 使用 subprocess.run result = subprocess.run( cmd, capture_output=True, text=True, timeout=30, ) print("STDOUT:") print(result.stdout) print("\nSTDERR:") print(result.stderr) print(f"\n退出码: {result.returncode}") if result.returncode == 0: print("\n✅ 基础调用成功!") return True else: print("\n❌ 命令执行失败") return False except subprocess.TimeoutExpired: print("\n❌ 命令超时") return False except Exception as e: print(f"\n❌ 异常: {e}") return False if __name__ == "__main__": success = test_basic_call() sys.exit(0 if success else 1) ``` **验收标准**: - [ ] 脚本可以成功调用 Claude CLI - [ ] 可以捕获完整输出 - [ ] 超时机制生效 **运行测试**: ```bash python poc_1_basic.py ``` --- #### 步骤3: 实时输出流捕获 (poc_1_streaming.py) ```python """PoC 1.2: Real-time output streaming.""" import subprocess import sys import select import os def test_streaming_output(): """测试实时输出流捕获.""" print("=== PoC 1.2: Streaming Output ===\n") cmd = ["claude", "count to 10 with 1 second delays in python"] print(f"执行命令: {' '.join(cmd)}") print("-" * 60) try: # 使用 Popen 进行流式输出 process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1, # 行缓冲 ) print("实时输出:") # 实时读取输出 while True: # 检查进程是否结束 if process.poll() is not None: break # 读取一行输出 line = process.stdout.readline() if line: print(f"[OUT] {line.rstrip()}") # 读取错误输出 # 注意: stderr 需要非阻塞读取或使用 select # 获取剩余输出 remaining_out, remaining_err = process.communicate() if remaining_out: print(remaining_out) print(f"\n退出码: {process.returncode}") if process.returncode == 0: print("\n✅ 流式输出捕获成功!") return True else: print("\n❌ 进程执行失败") return False except Exception as e: print(f"\n❌ 异常: {e}") return False if __name__ == "__main__": success = test_streaming_output() sys.exit(0 if success else 1) ``` **验收标准**: - [ ] 可以实时捕获输出(不是等待结束) - [ ] 输出不会丢失 - [ ] 可以正确检测进程结束 **运行测试**: ```bash python poc_1_streaming.py ``` --- #### 步骤4: PTY 伪终端处理 (poc_1_pty.py) ```python """PoC 1.3: PTY pseudo-terminal for interactive handling.""" import pty import os import sys import select import subprocess def test_pty_terminal(): """测试 PTY 伪终端处理.""" print("=== PoC 1.3: PTY Terminal ===\n") cmd = ["claude", "create a simple python calculator"] print(f"执行命令: {' '.join(cmd)}") print("-" * 60) try: # 创建 PTY master, slave = pty.openpty() # 启动进程 process = subprocess.Popen( cmd, stdin=slave, stdout=slave, stderr=slave, close_fds=True, ) os.close(slave) # 子进程已继承,关闭父进程的副本 print("实时输出 (PTY):") output_buffer = [] # 读取输出 while True: # 检查进程是否结束 if process.poll() is not None: break # 使用 select 进行非阻塞读取 r, w, e = select.select([master], [], [], 0.1) if master in r: try: data = os.read(master, 1024) if data: text = data.decode('utf-8', errors='replace') print(text, end='') output_buffer.append(text) except OSError: break os.close(master) print(f"\n\n退出码: {process.returncode}") print(f"总输出长度: {len(''.join(output_buffer))} 字符") if process.returncode == 0: print("\n✅ PTY 终端处理成功!") return True else: print("\n❌ 进程执行失败") return False except Exception as e: print(f"\n❌ 异常: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = test_pty_terminal() sys.exit(0 if success else 1) ``` **验收标准**: - [ ] PTY 可以正确创建 - [ ] 可以处理交互式输出 - [ ] ANSI 转义序列正确显示 **运行测试**: ```bash python poc_1_pty.py ``` --- #### 步骤5: 完整集成示例 (poc_1_full.py) ```python """PoC 1.4: Full Claude CLI integration.""" import asyncio import pty import os import select import subprocess from dataclasses import dataclass from typing import Optional @dataclass class CLIResult: """CLI execution result.""" exit_code: int output: str error: Optional[str] = None duration_ms: int = 0 class ClaudeCLI: """Claude CLI wrapper.""" def __init__(self, timeout: int = 300): """Initialize CLI wrapper.""" self.timeout = timeout async def execute(self, prompt: str, workspace: str = ".") -> CLIResult: """ Execute Claude CLI command. Args: prompt: 任务提示 workspace: 工作目录 Returns: CLIResult with output """ import time start_time = time.time() cmd = ["claude", prompt] try: # 创建 PTY master, slave = pty.openpty() # 启动进程 process = subprocess.Popen( cmd, stdin=slave, stdout=slave, stderr=slave, cwd=workspace, close_fds=True, ) os.close(slave) # 收集输出 output_buffer = [] # 读取输出 (带超时) timeout_time = start_time + self.timeout while True: # 检查超时 if time.time() > timeout_time: process.kill() raise TimeoutError(f"Command timeout after {self.timeout}s") # 检查进程是否结束 if process.poll() is not None: break # 非阻塞读取 r, w, e = select.select([master], [], [], 0.1) if master in r: try: data = os.read(master, 4096) if data: text = data.decode('utf-8', errors='replace') output_buffer.append(text) except OSError: break os.close(master) duration_ms = int((time.time() - start_time) * 1000) output = ''.join(output_buffer) return CLIResult( exit_code=process.returncode, output=output, duration_ms=duration_ms, ) except TimeoutError as e: return CLIResult( exit_code=-1, output=''.join(output_buffer), error=str(e), duration_ms=int((time.time() - start_time) * 1000), ) except Exception as e: return CLIResult( exit_code=-1, output='', error=str(e), duration_ms=int((time.time() - start_time) * 1000), ) async def test_full_integration(): """测试完整集成.""" print("=== PoC 1.4: Full Integration ===\n") cli = ClaudeCLI(timeout=60) # 测试用例1: 简单任务 print("测试1: 简单Python脚本") print("-" * 60) result = await cli.execute("create a hello world python script") print(f"输出:\n{result.output}") print(f"\n退出码: {result.exit_code}") print(f"耗时: {result.duration_ms}ms") if result.exit_code == 0: print("✅ 测试1通过") else: print(f"❌ 测试1失败: {result.error}") return False # 测试用例2: 需要多步骤的任务 print("\n\n测试2: 多步骤任务") print("-" * 60) result = await cli.execute( "create a simple REST API with FastAPI that has a hello endpoint" ) print(f"输出:\n{result.output[:500]}...") # 只显示前500字符 print(f"\n退出码: {result.exit_code}") print(f"耗时: {result.duration_ms}ms") if result.exit_code == 0: print("✅ 测试2通过") else: print(f"❌ 测试2失败: {result.error}") return False print("\n" + "=" * 60) print("✅ 完整集成验证通过!") print("=" * 60) return True if __name__ == "__main__": success = asyncio.run(test_full_integration()) import sys sys.exit(0 if success else 1) ``` **验收标准**: - [ ] 封装类可以正常工作 - [ ] 支持异步调用 - [ ] 超时控制有效 - [ ] 错误处理完善 - [ ] 可以处理简单和复杂任务 **运行测试**: ```bash python poc_1_full.py ``` --- ### PoC 1 总结 完成上述5个步骤后,填写以下清单: **验证结果**: - [ ] 基础调用可行 - [ ] 流式输出捕获可行 - [ ] PTY终端处理可行 - [ ] 完整集成验证通过 - [ ] 性能可接受 (大多数任务 < 1分钟) **发现的问题**: 1. 2. 3. **需要的改进**: 1. 2. 3. **结论**: - [ ] ✅ 可行,建议使用 PTY + asyncio 方案 - [ ] ⚠️ 部分可行,需要调整设计 - [ ] ❌ 不可行,需要备选方案 --- ## PoC 2: 智能路由算法 ### 目标 验证路由算法可以合理地将任务分类为 fast/medium/deep 三种模式。 ### 验证内容 - [ ] 启发式规则准确度 - [ ] LLM-based 路由可行性 - [ ] 用户指令覆盖 - [ ] 路由性能 (响应时间) ### 实施步骤 #### 步骤1: 启发式规则测试 (poc_2_heuristic.py) ```python """PoC 2.1: Heuristic routing rules.""" from dataclasses import dataclass from typing import Literal @dataclass class RoutingDecision: """Routing decision.""" mode: Literal["fast", "medium", "deep"] reason: str confidence: float # 0.0 - 1.0 class HeuristicRouter: """Heuristic-based router.""" def __init__(self): """Initialize rules.""" self.rules = { "fast_keywords": ["状态", "查询", "搜索", "是什么", "?", "?"], "deep_keywords": ["实现", "开发", "编写", "重构", "优化", "设计", "创建"], "medium_keywords": ["修改", "更新", "添加", "删除"], } def evaluate(self, message: str) -> RoutingDecision: """Evaluate routing decision.""" message_lower = message.lower() length = len(message) # 规则1: 短消息 + 查询关键词 -> fast if length < 50 and any(kw in message_lower for kw in self.rules["fast_keywords"]): return RoutingDecision( mode="fast", reason="短消息且包含查询关键词", confidence=0.9, ) # 规则2: 包含开发关键词 -> deep deep_count = sum(1 for kw in self.rules["deep_keywords"] if kw in message_lower) if deep_count >= 1: return RoutingDecision( mode="deep", reason=f"包含 {deep_count} 个开发关键词", confidence=0.7 + min(deep_count * 0.1, 0.2), ) # 规则3: 中等长度 + 修改关键词 -> medium if 50 <= length <= 200 and any(kw in message_lower for kw in self.rules["medium_keywords"]): return RoutingDecision( mode="medium", reason="中等长度且包含修改关键词", confidence=0.6, ) # 规则4: 长消息 -> medium/deep if length > 200: return RoutingDecision( mode="medium", reason="消息较长,可能需要多步处理", confidence=0.5, ) # 默认: medium return RoutingDecision( mode="medium", reason="默认中等复杂度", confidence=0.4, ) def test_heuristic_router(): """测试启发式路由.""" print("=== PoC 2.1: Heuristic Router ===\n") router = HeuristicRouter() # 测试用例 test_cases = [ ("NAS状态?", "fast"), ("搜索最新的Python教程", "fast"), ("实现一个Web服务", "deep"), ("重构这个模块", "deep"), ("修改配置文件中的端口", "medium"), ("添加一个新的API端点", "medium"), ("这是一段很长的描述,我需要你帮我分析一下这个系统的架构,然后给出优化建议,包括性能、安全性和可维护性等多个方面", "medium"), ] correct = 0 total = len(test_cases) print(f"测试用例数: {total}\n") for i, (message, expected) in enumerate(test_cases, 1): decision = router.evaluate(message) is_correct = decision.mode == expected correct += is_correct status = "✅" if is_correct else "❌" print(f"{status} 用例 {i}:") print(f" 消息: {message}") print(f" 预期: {expected}") print(f" 实际: {decision.mode} (置信度: {decision.confidence:.2f})") print(f" 理由: {decision.reason}") print() accuracy = correct / total print("=" * 60) print(f"准确率: {accuracy:.1%} ({correct}/{total})") print("=" * 60) if accuracy >= 0.7: print("\n✅ 启发式规则准确度可接受 (>= 70%)") return True else: print(f"\n⚠️ 准确度较低 ({accuracy:.1%}), 建议优化规则或使用LLM") return False if __name__ == "__main__": import sys success = test_heuristic_router() sys.exit(0 if success else 1) ``` **验收标准**: - [ ] 准确率 >= 70% - [ ] 置信度评分合理 - [ ] 响应时间 < 10ms **运行测试**: ```bash python poc_2_heuristic.py ``` --- #### 步骤2: LLM 路由测试 (poc_2_llm.py) ```python """PoC 2.2: LLM-based routing.""" import asyncio import os from anthropic import AsyncAnthropic class LLMRouter: """LLM-based router using Claude Haiku.""" def __init__(self, api_key: str): """Initialize with API key.""" self.client = AsyncAnthropic(api_key=api_key) self.model = "claude-3-5-haiku-20241022" async def evaluate(self, message: str) -> dict: """Evaluate using Claude Haiku.""" system_prompt = """ 你是一个智能路由助手。分析用户消息,判断任务复杂度: - **fast**: 简单查询,不需要复杂工具 (<1000 tokens) 例如: "NAS状态?", "搜索xxx", "是什么" - **medium**: 中等任务,需要少量工具调用 (<5000 tokens) 例如: "修改配置", "添加功能", "更新文档" - **deep**: 复杂任务,需要编程或多步骤处理 (>5000 tokens) 例如: "实现xxx", "重构xxx", "设计xxx" 返回 JSON 格式: { "mode": "fast|medium|deep", "reason": "判断理由", "confidence": 0.0-1.0 } """ try: response = await self.client.messages.create( model=self.model, max_tokens=200, system=system_prompt, messages=[ {"role": "user", "content": f"分析这个任务: {message}"} ] ) # 解析响应 import json text = response.content[0].text # 尝试提取JSON if "{" in text and "}" in text: json_str = text[text.find("{"):text.rfind("}")+1] result = json.loads(json_str) return result else: # 降级到启发式 return {"mode": "medium", "reason": "LLM解析失败", "confidence": 0.5} except Exception as e: print(f"LLM路由失败: {e}") return {"mode": "medium", "reason": f"错误: {e}", "confidence": 0.0} async def test_llm_router(): """测试LLM路由.""" print("=== PoC 2.2: LLM Router ===\n") # 检查API密钥 api_key = os.getenv("ANTHROPIC_API_KEY") if not api_key: print("⚠️ 未设置 ANTHROPIC_API_KEY,跳过LLM测试") return True router = LLMRouter(api_key) # 测试用例 test_cases = [ ("NAS状态?", "fast"), ("实现一个Web服务", "deep"), ("修改配置文件中的端口", "medium"), ] print(f"测试用例数: {len(test_cases)}\n") for i, (message, expected) in enumerate(test_cases, 1): print(f"用例 {i}: {message}") result = await router.evaluate(message) is_correct = result["mode"] == expected status = "✅" if is_correct else "❌" print(f" {status} 预期: {expected}, 实际: {result['mode']}") print(f" 理由: {result.get('reason', 'N/A')}") print(f" 置信度: {result.get('confidence', 0):.2f}") print() print("✅ LLM路由可行(需要API费用)") return True if __name__ == "__main__": success = asyncio.run(test_llm_router()) import sys sys.exit(0 if success else 1) ``` **验收标准**: - [ ] LLM可以正确分类 - [ ] 响应时间可接受 (< 2秒) - [ ] API费用在预算内 **运行测试**: ```bash export ANTHROPIC_API_KEY="your-api-key" python poc_2_llm.py ``` --- ### PoC 2 总结 **验证结果**: - [ ] 启发式规则准确度: ____% - [ ] LLM路由准确度: ____% - [ ] LLM平均响应时间: ___ ms - [ ] 单次LLM路由成本: $_____ **结论**: - [ ] ✅ 启发式规则足够,先用规则 - [ ] ✅ 启发式 + LLM混合(低置信度时调用LLM) - [ ] ⚠️ 仅使用LLM(成本和延迟较高) --- ## PoC 3: MCP Server 加载 ### 目标 验证可以动态加载和调用 MCP Server。 ### 验证内容 - [ ] MCP Server 发现和启动 - [ ] MCP 协议通信 (stdio/sse) - [ ] 工具调用和结果解析 - [ ] 进程生命周期管理 ### 实施步骤 #### 步骤1: 基础MCP连接 (poc_3_basic.py) ```python """PoC 3.1: Basic MCP server connection.""" import asyncio import subprocess import json async def test_mcp_connection(): """测试 MCP Server 连接.""" print("=== PoC 3.1: MCP Connection ===\n") # 使用官方 filesystem MCP server cmd = ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/tmp"] print(f"启动 MCP Server: {' '.join(cmd)}") print("-" * 60) try: # 启动进程 process = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) # 发送 initialize 请求 initialize_request = { "jsonrpc": "2.0", "id": 1, "method": "initialize", "params": { "protocolVersion": "0.1.0", "capabilities": {}, "clientInfo": { "name": "poc-test", "version": "0.1.0" } } } print("发送 initialize 请求...") process.stdin.write(json.dumps(initialize_request) + "\n") process.stdin.flush() # 读取响应 response_line = process.stdout.readline() response = json.loads(response_line) print(f"收到响应: {json.dumps(response, indent=2)}") if "result" in response: print("\n✅ MCP 连接成功!") print(f"Server capabilities: {response['result'].get('capabilities', {})}") # 清理 process.terminate() process.wait(timeout=5) return True else: print(f"\n❌ 初始化失败: {response.get('error')}") process.terminate() return False except Exception as e: print(f"\n❌ 异常: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = asyncio.run(test_mcp_connection()) import sys sys.exit(0 if success else 1) ``` **验收标准**: - [ ] MCP Server可以启动 - [ ] initialize握手成功 - [ ] 可以正确解析响应 **运行测试**: ```bash python poc_3_basic.py ``` --- #### 步骤2: 工具调用测试 (poc_3_tools.py) ```python """PoC 3.2: MCP tool calling.""" import asyncio import subprocess import json async def test_mcp_tools(): """测试 MCP 工具调用.""" print("=== PoC 3.2: MCP Tools ===\n") cmd = ["npx", "-y", "@modelcontextprotocol/server-filesystem", "/tmp"] try: process = subprocess.Popen( cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) # Initialize initialize_request = { "jsonrpc": "2.0", "id": 1, "method": "initialize", "params": { "protocolVersion": "0.1.0", "capabilities": {}, "clientInfo": {"name": "poc-test", "version": "0.1.0"} } } process.stdin.write(json.dumps(initialize_request) + "\n") process.stdin.flush() response = json.loads(process.stdout.readline()) print(f"✓ Initialized") # List tools list_tools_request = { "jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": {} } print("\n列出可用工具...") process.stdin.write(json.dumps(list_tools_request) + "\n") process.stdin.flush() response = json.loads(process.stdout.readline()) if "result" in response: tools = response["result"].get("tools", []) print(f"✓ 找到 {len(tools)} 个工具:") for tool in tools: print(f" - {tool['name']}: {tool.get('description', 'N/A')}") # 测试调用一个工具 (read_file) if tools: print("\n测试调用工具...") # 创建测试文件 test_file = "/tmp/mcp_test.txt" with open(test_file, "w") as f: f.write("Hello MCP!") call_tool_request = { "jsonrpc": "2.0", "id": 3, "method": "tools/call", "params": { "name": "read_file", "arguments": {"path": test_file} } } process.stdin.write(json.dumps(call_tool_request) + "\n") process.stdin.flush() response = json.loads(process.stdout.readline()) if "result" in response: print(f"✓ 工具调用成功: {response['result']}") print("\n✅ MCP 工具调用验证通过!") process.terminate() return True else: print(f"❌ 工具调用失败: {response.get('error')}") process.terminate() return False except Exception as e: print(f"\n❌ 异常: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = asyncio.run(test_mcp_tools()) import sys sys.exit(0 if success else 1) ``` **验收标准**: - [ ] 可以列出工具 - [ ] 可以调用工具 - [ ] 结果解析正确 **运行测试**: ```bash python poc_3_tools.py ``` --- ### PoC 3 总结 **验证结果**: - [ ] MCP Server 可以启动 - [ ] 协议通信正常 - [ ] 工具调用成功 - [ ] 进程管理可控 **发现的问题**: 1. 2. **结论**: - [ ] ✅ MCP 集成可行 - [ ] ⚠️ 需要解决的问题: - [ ] ❌ 不可行,需要替代方案 --- ## 总体结论 完成所有 PoC 后,填写总体评估: ### PoC 验证总结 | PoC | 状态 | 结论 | 备注 | |-----|------|------|------| | Claude CLI 集成 | ⏸️ | - | - | | 智能路由算法 | ⏸️ | - | - | | MCP Server 加载 | ⏸️ | - | - | ### 风险评估更新 原有风险的验证结果: 1. **Claude CLI集成复杂** (原风险等级: 高) - PoC 结果: ____ - 新风险等级: ____ - 建议: ____ 2. **智能路由效果不佳** (原风险等级: 中) - PoC 结果: ____ - 新风险等级: ____ - 建议: ____ 3. **MCP Server不稳定** (原风险等级: 中) - PoC 结果: ____ - 新风险等级: ____ - 建议: ____ ### 下一步建议 基于 PoC 结果: - [ ] ✅ 所有验证通过,可以开始 Phase 0 正式开发 - [ ] ⚠️ 部分通过,需要调整设计后开发 - [ ] ❌ 关键技术不可行,需要重新规划 具体行动: 1. 2. 3. --- **文档结束**