commit bae47a241171758ae1d73d8733886a3fab072114 Author: congsh Date: Fri Jun 12 16:04:03 2026 +0800 feat: 修复代码审核报告问题 diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..d149df8 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +.env +.env.local +.venv/ +venv/ +*.egg-info/ +.pytest_cache/ +.mypy_cache/ +data/ +*.db +.DS_Store diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..9a401e4 --- /dev/null +++ b/.env.example @@ -0,0 +1,42 @@ +# RSSKeeper 连接地址 +RSSKEEPER_BASE_URL=http://localhost:7329 + +# LLM API(兼容 OpenAI 格式) +OPENAI_API_KEY=sk-xxx +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_MODEL=gpt-4o-mini +OPENAI_TIMEOUT=60 +OPENAI_MAX_RETRIES=3 + +# dataClean 数据目录 +DATA_DIR=/app/data +DATABASE_URL=/app/data/dataclean.db + +# 简报输出目录 +BRIEF_OUTPUT_DIR=/app/data/briefs + +# 调度时间(分钟) +SUMMARIZE_INTERVAL_MINUTES=60 +TAG_SCORE_INTERVAL_MINUTES=1440 +DAILY_BRIEF_HOUR=8 +DAILY_BRIEF_MINUTE=0 + +# 去重阈值 +TITLE_SIMILARITY_THRESHOLD=0.85 +CONTENT_SIMILARITY_THRESHOLD=0.80 + +# 摘要长度 +MAX_AI_SUMMARY_LENGTH=300 +MIN_ORIGINAL_SUMMARY_LENGTH=100 + +# 每篇简报每个分类显示文章数 +BRIEF_TOP_N_PER_CATEGORY=10 + +# 日志级别 +LOG_LEVEL=INFO + +# Web UI / API 安全(生产环境务必设置) +# 为空时不启用 API Token 鉴权,仅建议在内网使用 +API_TOKEN= +# CORS 允许来源,逗号分隔;生产环境请填写具体域名 +CORS_ALLOWED_ORIGINS= diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b8dddb1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,53 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# 虚拟环境 +.venv/ +venv/ +ENV/ +env/ + +# 环境配置 +.env +.env.local + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# 测试与缓存 +.pytest_cache/ +.mypy_cache/ +.ruff_cache/ + +# 前端 +frontend/node_modules/ +frontend/dist/ + +# 数据目录 +data/ +*.db + +# 系统文件 +.DS_Store diff --git a/CODE_REVIEW.md b/CODE_REVIEW.md new file mode 100644 index 0000000..f704445 --- /dev/null +++ b/CODE_REVIEW.md @@ -0,0 +1,459 @@ +# dataClean 代码审核报告 + +> 审核日期:2026-06-12 +> 审核范围:后端(FastAPI + SQLAlchemy + APScheduler) / 前端(Vue 3 + Element Plus) / 配置与部署 +> 审核人:opencode + +## 项目概览 + +- **技术栈**:FastAPI 0.115 + SQLAlchemy 2.0 + SQLite + APScheduler 3.10(后端) / Vue 3.4 + Element Plus 2.6 + Vite 5(前端) / OpenAI 兼容 LLM +- **代码规模**:约 1.5k 行 Python + 1.2k 行 Vue +- **目标**:从 rssKeeper 拉取文章,做摘要/分类/打分/去重/简报生成,提供 Web UI +- **整体评价**:模块化清晰、`README.md` 完整可读,但存在安全、性能与正确性方面的隐患。 + +--- + +## 审核结论一览 + +| 严重等级 | 数量 | 含义 | +|----------|------|------| +| 🔴 严重 | 7 | 影响线上数据安全与正确性,上线前必须修复 | +| 🟡 中等 | 13 | 影响可维护性、时序正确性、可观测性,建议近期修复 | +| 🟢 轻量 | 10 | 代码风格、健壮性细节,可持续改进 | + +--- + +## 🔴 严重问题(上线前必须修复) + +### 1. CORS 配置错误且过于宽松 + +**文件**:`main.py:72-78` + +```python +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +``` + +- `allow_origins=["*"]` 与 `allow_credentials=True` 同时启用被 Starlette 视为非法组合。 +- 后端无任何鉴权(见 #2),任何网站都能通过浏览器代表"已登录用户"调用 API。 + +**建议**:生产环境收敛到具体域名,关闭 credentials,或删除 CORS(Web UI 走同源代理)。 + +--- + +### 2. 后端 API 无任何鉴权 + +所有接口(`/api/settings`、`/api/tasks/summarize`、`/api/taxonomy/bootstrap?force=true`)公开可访问: + +- `Settings.vue:24-35` 可在 Web UI 直接改写 LLM API Key。 +- `Tasks.vue:18-26` 可未经授权立即触发高额 LLM 调用。 +- 两者叠加,**任何能访问 7331 端口的访客都能改 key、消耗 token**。 + +**建议**:反代层加 BasicAuth,或在 `main.py` 加 `Depends(verify_token)`。 + +--- + +### 3. 去重任务破坏历史数据 + +**文件**:`app/deduplicator.py:146-152` + +```python +old_groups = db.query(DuplicateGroup).all() # 拉取全部 +for og in old_groups: + for art in og.articles: + art.duplicate_group_id = None + art.is_representative = False + db.delete(og) +db.commit() +``` + +去重仅按"当天"过滤文章(line 158-165),但**清空阶段删除的是所有日期的 `DuplicateGroup`**,且把历史上所有文章的 `is_representative` 重置为 `False`。 + +- 后果:每日 8:00 简报生成后,**所有历史文章的重复组信息都被清空**。 +- `brief.py:99-106` 依靠 `is_representative=True OR duplicate_group_id IS NULL` 取代表文章,缺一会导致简报里出现全部 N 篇文章。 + +**建议**:只删除 `representative_article_id` 属于当天文章的去重组,或在 `DuplicateGroup` 上加 `brief_date` 字段。 + +--- + +### 4. `_with_db` 装饰器静默吞掉所有异常 + +**文件**:`scheduler.py:40-51` + +```python +except Exception as exc: + logger.error("定时任务 %s 执行失败: %s", func.__name__, exc) +``` + +任务失败仅有日志,**没有**: +- 任务状态持久化(前端无法知道哪些任务最近失败过)。 +- 告警 / 通知。 +- 失败指标(Prometheus 等)。 + +如果 LLM 配额耗尽或 rssKeeper 挂掉,**服务会假装正常跑了 N 天**。 + +**建议**:建 `JobRunLog` 表记录 `(job_id, start, end, status, error)`,或在 Web UI 暴露上次运行结果。 + +--- + +### 5. 手动任务与定时任务可并发执行 + +**文件**:`main.py:248-267`、`scheduler.py:104-133` + +`max_instances=1` 仅对 APScheduler 注册的实例生效,不约束 `POST /api/tasks/summarize`。一旦同时执行,`fetch_and_summarize` 内部有重复 `commit()`,可能引发 unique 约束冲突或写脏数据。 + +**建议**:在 `main.py` 用全局 `threading.Lock` 包裹任务函数。 + +--- + +### 6. 去重算法 O(n²) 性能 + +**文件**:`app/deduplicator.py:88-113` + +对 `n` 篇文章做 BFS 嵌套循环,每对调用 `SequenceMatcher`(也是 O(L²))。200 篇时是 4 万次 `SequenceMatcher` + TF-IDF 矩阵计算,**单日任务常常跑 5–10 分钟**。 + +**建议**: +- 标题长度 hash → 桶聚类后再做 pair 比较(minhash / LSH 更佳)。 +- 内容相似度先按 TF-IDF 矩阵做阈值筛选 top-K,再做精确比较。 + +--- + +### 7. Dockerfile 以 root 运行且未指定 USER + +**文件**:`Dockerfile:10-26` + +`FROM python:3.12-slim` 后未建非 root 用户,gunicorn/uvicorn 全部以 root 跑。一旦 Web 漏洞被利用,攻击者直接拿到容器 root。 + +**建议**: + +```dockerfile +RUN useradd --create-home --uid 1000 app +USER app +``` + +--- + +## 🟡 中等问题(影响正确性 / 可维护性) + +### 8. 时区处理混乱 + +- `scheduler.py:35` 用 `timezone="Asia/Shanghai"`。 +- `scorer.py:49`、`brief.py:73` 等都用 `datetime.utcnow()`。 +- `summarizer.py:86` 把 ISO 时间解析为带 tzinfo,但 `scorer.py:55-58` 又 `replace(tzinfo=None)` 强行丢掉。 + +`score_articles` 内部用 UTC 当前时间,`_freshness_score` 在 24 小时分界点附近会因 tzinfo 一致性问题差几个小时。 + +**建议**:统一用 `datetime.now(timezone.utc)` 持久化,明确表里存的时区。 + +--- + +### 9. `datetime.utcnow()` 已被弃用 + +Python 3.12+ 标注 `datetime.utcnow()` 为 deprecated。 + +涉及文件: +- `models.py:25,45` +- `summarizer.py:137` +- `scorer.py:49` +- `brief.py:73,154` +- `settings_manager.py:98` + +**建议**:替换为 `datetime.now(timezone.utc)`。 + +--- + +### 10. 重复性分数公式与文档不符 + +**文件**:`app/scorer.py:83-91` + `deduplicator.py:194` + +```python +member_ids = [unique_articles[i].id for i in cluster] # 包含代表,最少 2 +... +dup_count = max(len(group.member_article_ids), 1) # >= 2 +compute_duplication_score(2) -> 25.0 # 不是 0 +``` + +注释说 "1 次为 0 分",实际最小是 2,永远不会得 0。 + +**建议**:用 `len(member_article_ids) - 1`(非代表成员数),或调整公式。 + +--- + +### 11. 标签筛选性能差且语义不严谨 + +**文件**:`main.py:179-180` + +```python +if tag: + query = query.filter(EnrichedArticle.tags.contains([tag])) +``` + +SQLAlchemy 会把整个 JSON 列 `json.dumps` 后做字符串包含比较,**无法走索引**。表大时会全表扫描,且若文章有 `["人工智能"]`,匹配 "人工" 也会命中。 + +**建议**:建关联表 `article_tags(article_id, tag_name)`,或使用 SQLite JSON 函数 `json_each`。 + +--- + +### 12. Pydantic v1 风格 Config + +**文件**:`main.py:99-125` + +```python +class Config: + from_attributes = True +``` + +应改为 Pydantic v2 风格: + +```python +model_config = ConfigDict(from_attributes=True) +``` + +并需 `from pydantic import ConfigDict`。`ArticleOut.tags: list` 也应改为 `List[str]`,否则对 SQLAlchemy JSON 列不会做反序列化。 + +--- + +### 13. `_with_db` 装饰器未保留元信息 + +**文件**:`scheduler.py:40-51` + +手写 `wrapper.__name__ = func.__name__`,但缺 `__doc__`、`__wrapped__`。改用 `@functools.wraps(func)` 更标准。 + +--- + +### 14. 前端串行保存 17 个配置项 + +**文件**:`Settings.vue:68-80` + +```js +for (const item of settings.value) { + await datacleanApi.updateSetting(item.key, item.value) +} +``` + +17 个 PUT 串行,任何一个失败就中断且不提示哪些失败。 + +**建议**:后端加 `PUT /api/settings` 批量接口;前端用 `Promise.allSettled` 或事务式调用。 + +--- + +### 15. 分页 total 是 hack + +**文件**:`Articles.vue:108` + +```js +pagination.total = res.length === pagination.size + ? pagination.page * pagination.size + 1 + : (pagination.page - 1) * pagination.size + res.length +``` + +`+1` 是为了让 el-pagination 多显示一页按钮的粗暴 hack,**末页判断会出错**(恰好填满时 total 比真实多 1)。 + +**建议**:后端响应里加 `total` 字段(`/api/articles` 改为 `{items, total}`),前端用真实 total。 + +--- + +### 16. 缺数据库迁移 + +`database.py:34-35` 仅 `Base.metadata.create_all`: + +- 加列(如 `EnrichedArticle.is_hidden`)会无报错地忽略。 +- 类型变更(`String(128)` → `String(256)`)会保留旧列。 +- 删字段不会清理。 + +**建议**:引入 Alembic,至少 `alembic init` 起一个 baseline。 + +--- + +### 17. `_normalize_title` 字符范围偏窄 + +**文件**:`deduplicator.py:23` + +```python +title = re.sub(r"[^\w一-鿿]", " ", title) +``` + +- `\w` 不含中文,逻辑可接受。 +- 鿿是 U+9FFF,**U+A000–U+FFFF 之间的生僻字 / 部首扩展区 B 字符会被误删**。可用 `[\u4e00-\u9fff]` 或 Python `regex` 库的 `\p{Han}`。 + +--- + +### 18. Docker 构建镜像源硬编码 + +**文件**:`Dockerfile:5,20` + +- `npmmirror.com` 镜像在国内可用,海外构建会慢或超时。 +- `tuna.tsinghua.edu.cn` 同上。 + +**建议**:用 `ARG REGISTRY_MIRROR=...` + `--build-arg` 注入,或在 CI/海外构建时覆盖。 + +--- + +### 19. LLM 客户端无 token 计数 / 限流 + +`ai_client.py` 每次失败抛异常就完事。`fetch_and_summarize`(`summarizer.py:139-143`)对每篇文章都重试,没有: +- 失败后 cooldown。 +- Token 用量统计。 +- 限速(OpenAI tier 限流会导致 429)。 + +**建议**:加 `tenacity` 做指数退避、记录 429 重试、保存 token 消耗日志。 + +--- + +### 20. `_get_env_default` 强转字符串丢失类型 + +**文件**:`settings_manager.py:36-39` + +```python +return str(value) if value is not None else "" +``` + +`OPENAI_TIMEOUT=60` 写入数据库变成 `"60"`,再 `apply_db_settings_to_config` 里 `int(db_value)` 还原——逻辑 OK,**但**如果用户直接编辑 DB 写入非数字字符串,启动时 `apply_db_settings_to_config` 会捕获失败(`logger.warning` 不会中断),**线上的 `settings.OPENAI_TIMEOUT` 仍是默认值**,行为不可见。 + +**建议**:失败时启动失败或返回 HTTP 503 明确告知。 + +--- + +## 🟢 轻量问题(可优化) + +### 21. 前端无错误边界 + +`App.vue` 没 `errorCaptured`,任一视图抛错都白屏。 + +### 22. 测试覆盖度不足 + +- `test_deduplicator.py` 测了单簇简单情况,但未覆盖: + - 跨日期去重 + - URL 重复但内容不同 + - 大簇(>5 篇) + - `deduplicate_articles` 中 `old_groups` 清空逻辑(**这是严重 bug**) +- `test_scorer.py` 没测 `_freshness_score`。 +- 没有 `test_taxonomy.py`、`test_summarizer.py`、`test_brief.py`、`test_settings_manager.py`。 +- 没有 HTTP 接口测试(`fastapi.testclient`)。 + +### 23. 日志可观测性 + +仅 `logging.basicConfig` 文本格式,**没有 request_id、没有结构化字段**。多 worker 时难以追踪。 + +### 24. `config.py:60` 路径创建副作用 + +`@property database_path` 在 `Settings()` 实例化时 `mkdir`,导入 `config` 就改文件系统。**测试或 CLI 工具 import 该模块就会创建目录**。 + +**建议**:把目录创建放到 `database.init_db()` 里。 + +### 25. `feed_category` 字段名耦合假设 + +**文件**:`summarizer.py:96` + +假设 rssKeeper 返回字段 `category`,但 README 没写明 rssKeeper 接口契约。应加注释或 Pydantic 模型校验。 + +### 26. 简报输出目录嵌套过深 + +**文件**:`brief.py:130` + +写到 `BRIEF_OUTPUT_DIR/2024-01-01/daily-brief.md`,日期子目录无必要。 + +### 27. 静态文件兜底逻辑奇怪 + +**文件**:`main.py:330-338` + +```python +if not os.path.isdir(static_dir): + frontend_dist = os.path.join(os.path.dirname(__file__), "frontend", "dist") + if os.path.isdir(frontend_dist): + static_dir = frontend_dist +``` + +- 本地开发用 `npm run dev` 走 Vite 代理,**`frontend/dist` 几乎不存在**,这段代码不工作。 +- `app.mount("/", ...)` 会拦截所有未匹配的路由,**包括 `/health` 和 `/api/*`**。FastAPI 的注册顺序会把 `app.mount` 放在最末,应该 OK,但建议把静态文件 fallback 用 `html=True` 时显式跳过 `/api` 与 `/health`。 + +### 28. README 写"重启后生效"但接口无重启能力 + +- `main.py:282` 写 "配置已保存,重启服务后生效"。 +- 调度间隔是**启动时读取**的(`scheduler.py:97-100`),所以改 `SUMMARIZE_INTERVAL_MINUTES` 真的需要重启。 +- 应当提供 `POST /api/restart` 或在 `apply_db_settings_to_config` 之后重新注册 job。 + +### 29. `models.py:32` `default=list` 是可变默认值陷阱 + +SQLAlchemy 会克隆 default callable,但**仍建议写成 `default=lambda: list()`** 或在 Python 3.11+ 改用不可变 sentinel。 + +### 30. 前端无 TypeScript + +所有 API 调用都没有类型提示,重构后端响应字段前端不会报错。建议至少加 jsdoc 或逐步迁移到 TS。 + +--- + +## 重点修复清单(按 ROI 排序) + +| 优先级 | 修复项 | 估计工时 | 风险等级 | +|--------|--------|----------|----------| +| P0 | 加最小化鉴权(BasicAuth 或 token) | 1h | 高 | +| P0 | 修复去重 `old_groups` 清空范围 | 30min | 高 | +| P0 | CORS 收敛到生产域名 | 10min | 高 | +| P0 | Dockerfile 加 `USER` | 5min | 高 | +| P1 | 修复分页 total 逻辑(后端 + 前端) | 2h | 中 | +| P1 | 加任务运行日志表 | 3h | 中 | +| P1 | 手动 / 定时任务互斥锁 | 1h | 中 | +| P1 | 修复 `compute_duplication_score` 公式 | 15min | 中 | +| P1 | 前端批量保存配置 | 30min | 中 | +| P2 | 引入 Alembic | 4h | 中 | +| P2 | 去重算法优化(桶聚类 / minhash) | 1d | 中 | +| P2 | 统一时区到 UTC | 1h | 低 | +| P2 | LLM 限流 + token 统计 | 4h | 低 | +| P3 | 前端错误边界 + TypeScript | 1d | 低 | + +--- + +## 总评 + +**项目优点**: +- 模块切分清晰(`app/` 下每个职责一个文件)。 +- 关键业务逻辑都有单元测试基础。 +- 配置双层(env + DB)设计合理。 +- 日志、错误信息友好。 +- Docker 部署文档完整。 + +**主要风险**: +- **鉴权 + CORS** 双重缺失 → 任何公网访问都是灾难。 +- **去重任务数据破坏** → 每日 8:00 简报会持续错误。 +- **去重算法性能** → 数据量上来后 O(n²) 不可持续。 + +**建议路径**: +1. **第一步**:修复 P0 安全 / 数据正确性问题(鉴权、CORS、去重 bug、Dockerfile)。 +2. **第二步**:补全可观测性(任务运行日志、token 统计、失败告警)。 +3. **第三步**:性能优化(去重算法、分页、并发锁、LLM 限流)。 +4. **持续改进**:迁移到 TypeScript、引入 Alembic、统一时区、补全测试覆盖。 + +--- + +## 附录:文件清单 + +| 文件 | 行数 | 状态 | +|------|------|------| +| `main.py` | 343 | 需修复(CORS、分页响应、锁、Auth) | +| `config.py` | 63 | 可优化(路径创建副作用) | +| `database.py` | 36 | 建议(Alembic 迁移) | +| `models.py` | 104 | 可优化(JSON 默认值、UTC) | +| `scheduler.py` | 151 | 需修复(异常吞掉、时区、互斥) | +| `app/rss_client.py` | 104 | 正常 | +| `app/ai_client.py` | 92 | 建议(限流、重试) | +| `app/taxonomy.py` | 140 | 正常 | +| `app/summarizer.py` | 154 | 可优化(提交边界、重试) | +| `app/tagger.py` | 116 | 正常 | +| `app/scorer.py` | 146 | 需修复(duplication 公式、时区) | +| `app/deduplicator.py` | 216 | 需修复(清空范围、性能) | +| `app/brief.py` | 168 | 可优化(时区、目录嵌套) | +| `app/settings_manager.py` | 185 | 需修复(类型校验失败处理) | +| `tests/conftest.py` | 21 | 正常 | +| `tests/test_deduplicator.py` | 78 | 覆盖不足 | +| `tests/test_scorer.py` | 46 | 覆盖不足 | +| `tests/test_tagger.py` | 43 | 覆盖不足 | +| `Dockerfile` | 27 | 需修复(USER) | +| `docker-compose.yml` | 19 | 正常 | +| `frontend/src/api/index.js` | 47 | 正常 | +| `frontend/src/views/*.vue` | - | 需修复(分页、批量保存、错误边界) | diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..38645d9 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,38 @@ +# Stage 1: 构建前端 +FROM node:20-alpine AS frontend-builder + +ARG NPM_REGISTRY=https://registry.npmmirror.com + +WORKDIR /app/frontend +COPY frontend/package*.json ./ +RUN npm install --registry=${NPM_REGISTRY} +COPY frontend/ . +RUN npm run build + +# Stage 2: Python 后端 +FROM python:3.12-slim + +ARG PIP_INDEX=https://pypi.tuna.tsinghua.edu.cn/simple + +WORKDIR /app + +# 安装构建依赖(部分 Python 包可能需要),并创建非 root 用户 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* \ + && useradd --create-home --uid 1000 app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt -i ${PIP_INDEX} + +COPY . . +COPY --from=frontend-builder /app/frontend/dist ./static + +# 确保数据目录对 app 用户可写 +RUN mkdir -p /app/data && chown -R app:app /app/data + +USER app + +EXPOSE 7331 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7331", "--workers", "1"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a7eb23 --- /dev/null +++ b/README.md @@ -0,0 +1,197 @@ +# dataClean + +`dataClean` 是 `rssKeeper` 的下游数据清洗与加工服务,负责: + +- 为无摘要或摘要过短的 RSS 文章生成 **AI 摘要** +- 维护本地 **分类/标签/打分规则表**,初始由 AI 生成,后续按规则执行 +- 对文章自动 **分类、打标签** +- 计算三维度分数:**热度、重要性、多源重复性** +- 基于 URL 和 **内容相似度去重** +- 生成每日简报(**Markdown 文件 + 结构化 JSON/API**) +- 提供 **Web UI** 可视化展示结果并管理配置 + +## 技术栈 + +- 后端:Python 3.12 + FastAPI + SQLAlchemy 2.0 + SQLite + APScheduler +- 前端:Vue 3.4 + Element Plus 2.6 + Vite 5 + Axios +- AI:OpenAI API 兼容客户端 +- 去重:scikit-learn(TF-IDF 相似度) + +## 快速开始 + +### 1. 环境配置 + +复制示例配置并修改: + +```bash +cp .env.example .env +``` + +编辑 `.env`,至少配置: + +```bash +RSSKEEPER_BASE_URL=http://localhost:7329 +OPENAI_API_KEY=sk-xxx +OPENAI_BASE_URL=https://api.openai.com/v1 +OPENAI_MODEL=gpt-4o-mini + +# 生产环境务必设置 API Token,Web UI 右上角可输入该 Token 后调用受保护接口 +API_TOKEN=your-strong-token-here +# CORS 允许来源,逗号分隔;生产环境请填写具体域名 +CORS_ALLOWED_ORIGINS=https://dataclean.example.com +``` + +### 2. Docker 运行(推荐) + +```bash +docker-compose up -d --build +``` + +服务将运行在 `http://localhost:7331`,Web UI 直接通过该地址访问。 + +### 3. 本地开发 + +启动后端: + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +uvicorn main:app --reload --port 7331 +``` + +启动前端(新终端): + +```bash +cd frontend +npm install +npm run dev +``` + +前端开发服务器运行在 `http://localhost:7332`,代理到后端 `http://localhost:7331`。 + +## 核心流程 + +服务启动后: + +1. 自动初始化 SQLite 数据库。 +2. 若 `app_settings` 表为空,使用 `.env` 中的值初始化默认配置。 +3. 若 `taxonomy` 表为空,调用 LLM 生成分类/标签/打分规则(仅一次)。 +4. 启动定时任务: + - **摘要任务**:每 `SUMMARIZE_INTERVAL_MINUTES` 分钟(默认 60)拉取最近文章并补充 AI 摘要。 + - **分类/打分/去重任务**:每 `TAG_SCORE_INTERVAL_MINUTES` 分钟(默认 1440,即 24 小时)执行。 + - **每日简报**:每天 `08:00` 生成昨日/当日简报。 + +## Web UI 功能 + +| 页面 | 功能 | +|------|------| +| 仪表盘 | 统计卡片、分类分布、最近简报、定时任务下次执行时间 | +| 文章列表 | 搜索、分类/标签筛选、代表文章过滤、分页、综合分排序 | +| 文章详情 | AI 摘要、标签分类、热度/重要性/重复度/综合分、原文链接 | +| 每日简报 | 简报列表、按分类聚合展示、重新生成 | +| 分类体系 | 分类/标签/打分规则查看、手动触发 AI 重新生成 | +| 任务管理 | 手动触发摘要/分类/去重/简报任务 | +| 系统配置 | 查看和修改所有配置项,保存到数据库,重启后生效 | + +## API 接口 + +| 接口 | 说明 | +|------|------| +| `GET /health` | 健康检查 | +| `GET /api/articles` | 查询加工后文章(返回 `{items, total}`) | +| `GET /api/articles/{id}` | 单篇详情 | +| `GET /api/briefs` | 简报列表 | +| `GET /api/briefs/{date}` | 指定日期简报(YYYY-MM-DD) | +| `POST /api/briefs/{date}/regenerate` | 手动重新生成简报(需 Token) | +| `GET /api/taxonomy` | 分类/标签/规则列表 | +| `POST /api/taxonomy/bootstrap?force=true` | 手动触发/重置分类体系(需 Token) | +| `POST /api/tasks/summarize` | 手动触发摘要任务(需 Token,互斥锁) | +| `POST /api/tasks/tag-score-dedup` | 手动触发分类/去重/打分任务(需 Token,互斥锁) | +| `POST /api/tasks/brief` | 手动触发简报生成任务(需 Token,互斥锁) | +| `GET /api/settings` | 获取所有可编辑配置(需 Token) | +| `PUT /api/settings/{key}` | 更新单个配置(需 Token) | +| `PUT /api/settings` | 批量更新配置(需 Token) | +| `POST /api/settings/reset` | 重置为 `.env` 默认值(需 Token) | +| `GET /api/stats` | 仪表盘统计数据 | + +## 目录结构 + +``` +dataClean/ +├── main.py # FastAPI 入口 +├── config.py # 环境变量配置 +├── database.py # SQLite 连接 +├── models.py # SQLAlchemy 模型 +├── scheduler.py # APScheduler 定时任务 +├── Dockerfile # 多阶段构建(含前端) +├── docker-compose.yml +├── requirements.txt +├── .env.example +├── README.md +├── app/ # 后端业务模块 +│ ├── rss_client.py +│ ├── ai_client.py +│ ├── taxonomy.py +│ ├── summarizer.py +│ ├── tagger.py +│ ├── scorer.py +│ ├── deduplicator.py +│ ├── brief.py +│ └── settings_manager.py +├── tests/ # 后端测试 +└── frontend/ # Vue 3 Web UI + ├── package.json + ├── vite.config.js + ├── index.html + └── src/ + ├── main.js + ├── App.vue + ├── router/ + ├── api/ + ├── style.css + └── views/ +``` + +## 安全说明 + +- **API Token**:生产环境请务必设置 `API_TOKEN`。所有写入类接口(修改配置、触发任务、重新生成分类/简报)都需要在请求头携带 `Authorization: Bearer `。Web UI 右上角提供 Token 输入框。 +- **CORS**:默认不启用跨域 credentials。生产环境请通过 `CORS_ALLOWED_ORIGINS` 设置具体域名,避免 `*` + `allow_credentials=True` 的安全风险。 +- **容器权限**:Dockerfile 已使用非 root 用户 `app`(uid=1000)运行服务。 + +## 与 rssKeeper 的关系 + +- dataClean **只读调用** rssKeeper 的外部 API(`/api/v1/external/*`)。 +- 所有加工结果(AI 摘要、标签、分数、去重组、简报)存储在 dataClean 本地 SQLite 中。 +- 不回写 rssKeeper,避免耦合。 + +## 配置说明 + +配置分两层: + +1. **环境变量(`.env`)**:首次启动时的默认值,Docker 运行时使用。 +2. **数据库配置(`app_settings` 表)**:通过 Web UI 修改后保存到这里,重启服务后生效。 + +详见 `.env.example`。关键配置: + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `RSSKEEPER_BASE_URL` | `http://localhost:7329` | rssKeeper 服务地址 | +| `OPENAI_API_KEY` | - | LLM API Key | +| `OPENAI_MODEL` | `gpt-4o-mini` | 模型名 | +| `SUMMARIZE_INTERVAL_MINUTES` | 60 | 摘要任务间隔 | +| `TAG_SCORE_INTERVAL_MINUTES` | 1440 | 分类/打分/去重任务间隔 | +| `DAILY_BRIEF_HOUR` / `MINUTE` | 8 / 0 | 简报生成时间 | +| `TITLE_SIMILARITY_THRESHOLD` | 0.85 | 标题相似度阈值 | +| `CONTENT_SIMILARITY_THRESHOLD` | 0.80 | 内容相似度阈值 | +| `API_TOKEN` | - | API 鉴权 Token(为空不启用鉴权) | +| `CORS_ALLOWED_ORIGINS` | - | CORS 允许来源,逗号分隔 | + +## 后续扩展 + +- 接入 rssKeeper 前端展示 enriched 数据 +- 支持多语言摘要 +- 接入向量数据库做语义检索 +- 根据用户反馈调整 taxonomy 规则 +- 引入 Alembic 数据库迁移 +- Web UI 迁移到 TypeScript diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/ai_client.py b/app/ai_client.py new file mode 100644 index 0000000..5cf4de2 --- /dev/null +++ b/app/ai_client.py @@ -0,0 +1,92 @@ +"""LLM API 客户端,兼容 OpenAI API 格式""" +import json +import logging +from typing import Optional + +from openai import OpenAI, APIError + +from config import settings + +logger = logging.getLogger(__name__) + + +class AIClient: + """封装 LLM 调用,支持重试和 JSON 输出""" + + def __init__( + self, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + model: Optional[str] = None, + timeout: Optional[int] = None, + max_retries: Optional[int] = None, + ): + self.api_key = api_key or settings.OPENAI_API_KEY + self.base_url = base_url or settings.OPENAI_BASE_URL + self.model = model or settings.OPENAI_MODEL + self.timeout = timeout or settings.OPENAI_TIMEOUT + self.max_retries = max_retries or settings.OPENAI_MAX_RETRIES + + self._client: Optional[OpenAI] = None + + @property + def client(self) -> OpenAI: + if self._client is None: + self._client = OpenAI( + api_key=self.api_key, + base_url=self.base_url, + timeout=self.timeout, + max_retries=self.max_retries, + ) + return self._client + + def chat_completion( + self, + system_prompt: str, + user_prompt: str, + temperature: float = 0.3, + json_mode: bool = False, + ) -> str: + """调用 LLM 返回文本""" + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + kwargs = { + "model": self.model, + "messages": messages, + "temperature": temperature, + } + if json_mode: + kwargs["response_format"] = {"type": "json_object"} + + try: + resp = self.client.chat.completions.create(**kwargs) + content = resp.choices[0].message.content or "" + return content.strip() + except APIError as exc: + logger.error("LLM API 调用失败: %s", exc) + raise + + def chat_completion_json( + self, + system_prompt: str, + user_prompt: str, + temperature: float = 0.3, + ) -> dict: + """调用 LLM 并解析返回的 JSON""" + content = self.chat_completion( + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=temperature, + json_mode=True, + ) + try: + return json.loads(content) + except json.JSONDecodeError as exc: + logger.error("LLM 返回不是合法 JSON: %s - content=%s", exc, content[:500]) + raise + + +ai_client = AIClient() diff --git a/app/brief.py b/app/brief.py new file mode 100644 index 0000000..ac58062 --- /dev/null +++ b/app/brief.py @@ -0,0 +1,168 @@ +"""每日简报生成""" +import json +import logging +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Dict, Any, List + +from sqlalchemy.orm import Session + +from config import settings +from models import EnrichedArticle, DailyBrief + +logger = logging.getLogger(__name__) + + +def _format_article(article: EnrichedArticle) -> Dict[str, Any]: + """把文章格式化为简报中的条目""" + return { + "id": article.id, + "rk_article_id": article.rk_article_id, + "title": article.title or "", + "link": article.link or "", + "author": article.author or "", + "feed_title": article.feed_title or "", + "summary": article.ai_summary or article.original_summary or "", + "tags": article.tags or [], + "heat_score": article.heat_score, + "importance_score": article.importance_score, + "duplication_score": article.duplication_score, + "composite_score": article.composite_score, + "published_at": article.published_at.isoformat() if article.published_at else None, + } + + +def _build_markdown(date_str: str, by_category: Dict[str, List[Dict[str, Any]]], stats: Dict[str, int]) -> str: + """生成 Markdown 简报""" + lines = [ + f"# RSS 每日简报 ({date_str})", + "", + f"- 去重前文章数: {stats['total_articles']}", + f"- 去重后文章数: {stats['unique_articles']}", + f"- 生成分类数: {len(by_category)}", + "", + "---", + "", + ] + + for category, items in sorted(by_category.items(), key=lambda x: x[0]): + lines.append(f"## {category}") + lines.append("") + for item in items: + tags = " ".join([f"`{t}`" for t in item["tags"]]) if item["tags"] else "" + lines.append(f"### {item['title']}") + lines.append(f"- 来源: {item['feed_title']} | 作者: {item.get('author') or '未知'}") + lines.append(f"- 标签: {tags}") + lines.append(f"- 热度: {item['heat_score']:.1f} | 重要性: {item['importance_score']:.1f} | 重复度: {item['duplication_score']:.1f} | 综合: {item['composite_score']:.1f}") + if item["summary"]: + lines.append(f"- 摘要: {item['summary']}") + if item["link"]: + lines.append(f"- [阅读原文]({item['link']})") + lines.append("") + + return "\n".join(lines) + + +def generate_daily_brief(db: Session, date_str: str = None, force: bool = False) -> Dict[str, Any]: + """ + 生成指定日期的每日简报。 + 若 date_str 为空则处理今天。 + 返回简报数据字典。 + """ + if date_str is None: + date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + # 检查是否已存在 + existing = db.query(DailyBrief).filter(DailyBrief.brief_date == date_str).first() + if existing and not force: + logger.info("日期 %s 简报已存在,跳过生成", date_str) + return { + "date": date_str, + "total_articles": existing.total_articles, + "unique_articles": existing.unique_articles, + "markdown_path": existing.markdown_path, + } + + day_start = datetime.strptime(date_str, "%Y-%m-%d") + day_end = day_start + timedelta(days=1) + + # 取当天去重后的代表文章 + query = ( + db.query(EnrichedArticle) + .filter( + EnrichedArticle.fetched_at >= day_start, + EnrichedArticle.fetched_at < day_end, + ) + ) + + # 默认只取代表文章或未归入重复组的文章 + representative_articles = ( + query.filter( + (EnrichedArticle.is_representative == True) + | (EnrichedArticle.duplicate_group_id == None) + ) + .order_by(EnrichedArticle.composite_score.desc()) + .all() + ) + + # 按分类分组并排序 + by_category: Dict[str, List[Dict[str, Any]]] = {} + for art in representative_articles: + cat = art.category or "未分类" + if cat not in by_category: + by_category[cat] = [] + by_category[cat].append(_format_article(art)) + + # 每个分类只保留 TOP N + top_n = settings.BRIEF_TOP_N_PER_CATEGORY + for cat in by_category: + by_category[cat] = by_category[cat][:top_n] + + total_before_dedup = query.count() + unique_count = sum(len(items) for items in by_category.values()) + + stats = { + "total_articles": total_before_dedup, + "unique_articles": unique_count, + } + + # 生成 Markdown 文件 + output_dir = settings.brief_output_dir_path / date_str + output_dir.mkdir(parents=True, exist_ok=True) + markdown_path = output_dir / "daily-brief.md" + markdown_content = _build_markdown(date_str, by_category, stats) + markdown_path.write_text(markdown_content, encoding="utf-8") + + # 更新文章 brief_date + for art in representative_articles: + art.brief_date = date_str + + # 保存到数据库 + brief_data = { + "date": date_str, + "total_articles": stats["total_articles"], + "unique_articles": stats["unique_articles"], + "by_category": by_category, + "markdown_path": str(markdown_path), + } + + if existing: + existing.total_articles = stats["total_articles"] + existing.unique_articles = stats["unique_articles"] + existing.by_category = by_category + existing.markdown_path = str(markdown_path) + existing.updated_at = datetime.now(timezone.utc) + else: + db.add( + DailyBrief( + brief_date=date_str, + total_articles=stats["total_articles"], + unique_articles=stats["unique_articles"], + by_category=by_category, + markdown_path=str(markdown_path), + ) + ) + + db.commit() + logger.info("简报生成完成: 日期=%s, 去重前=%d, 去重后=%d", date_str, stats["total_articles"], stats["unique_articles"]) + return brief_data diff --git a/app/deduplicator.py b/app/deduplicator.py new file mode 100644 index 0000000..79bd46c --- /dev/null +++ b/app/deduplicator.py @@ -0,0 +1,223 @@ +"""文章去重:URL 精确去重 + 标题/内容相似度去重""" +import logging +import re +from datetime import datetime, timedelta, timezone +from difflib import SequenceMatcher +from typing import List, Dict, Tuple, Set + +from sqlalchemy.orm import Session +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.metrics.pairwise import cosine_similarity +import numpy as np + +from config import settings +from models import EnrichedArticle, DuplicateGroup + +logger = logging.getLogger(__name__) + + +def _normalize_title(title: str) -> str: + """标题规范化:去除标点和多余空格,小写,保留中英文数字""" + if not title: + return "" + # 保留:单词字符、CJK 统一表意符号(含扩展 A/B/C/D/E) + title = re.sub( + r"[^\w一-鿿㐀-䶿\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f]", + " ", + title, + ) + title = " ".join(title.split()) + return title.lower() + + +def _title_similarity(a: str, b: str) -> float: + """计算标题相似度""" + na = _normalize_title(a) + nb = _normalize_title(b) + if not na or not nb: + return 0.0 + return SequenceMatcher(None, na, nb).ratio() + + +def _content_similarity_matrix(contents: List[str]) -> np.ndarray: + """使用 TF-IDF + 余弦相似度计算内容相似度矩阵""" + if len(contents) < 2: + return np.zeros((len(contents), len(contents))) + + # 过滤空内容 + valid_contents = [c or "" for c in contents] + try: + vectorizer = TfidfVectorizer( + max_features=5000, + stop_words="english", + ngram_range=(1, 2), + min_df=1, + ) + tfidf = vectorizer.fit_transform(valid_contents) + return cosine_similarity(tfidf) + except Exception as exc: + logger.warning("TF-IDF 相似度计算失败: %s", exc) + return np.zeros((len(contents), len(contents))) + + +def _find_duplicate_clusters( + articles: List[EnrichedArticle], + title_threshold: float = None, + content_threshold: float = None, +) -> List[Set[int]]: + """ + 基于标题相似度和内容相似度找出重复簇。 + 返回索引簇列表,每个簇是一组 articles 的索引集合。 + """ + title_threshold = title_threshold or settings.TITLE_SIMILARITY_THRESHOLD + content_threshold = content_threshold or settings.CONTENT_SIMILARITY_THRESHOLD + + n = len(articles) + if n < 2: + return [] + + contents = [] + for art in articles: + text = " ".join([ + art.title or "", + art.ai_summary or art.original_summary or "", + art.content or "", + ]) + contents.append(text[:2000]) # 限制长度加速计算 + + content_sim = _content_similarity_matrix(contents) + + visited = [False] * n + clusters: List[Set[int]] = [] + + for i in range(n): + if visited[i]: + continue + cluster = {i} + queue = [i] + visited[i] = True + + while queue: + cur = queue.pop(0) + for j in range(n): + if visited[j] or cur == j: + continue + + title_sim = _title_similarity(articles[cur].title or "", articles[j].title or "") + c_sim = content_sim[cur][j] if cur < n and j < n else 0.0 + + # 标题高度相似 或 内容高度相似均视为重复 + if title_sim >= title_threshold or c_sim >= content_threshold: + cluster.add(j) + queue.append(j) + visited[j] = True + + if len(cluster) > 1: + clusters.append(cluster) + + return clusters + + +def _pick_representative(articles: List[EnrichedArticle], indices: Set[int]) -> EnrichedArticle: + """从重复组中选择代表文章:优先选有 AI 摘要、来源 Feed 分类明确、发布时间最早的""" + candidates = [articles[i] for i in indices] + # 排序:有 AI 摘要优先,然后有 Feed 分类,然后发布时间早 + candidates.sort( + key=lambda a: ( + bool(a.ai_summary), + bool(a.feed_category), + a.published_at or datetime.min, + ), + reverse=True, + ) + return candidates[0] + + +def deduplicate_articles( + db: Session, + date_str: str = None, + title_threshold: float = None, + content_threshold: float = None, +) -> Dict[str, int]: + """ + 对指定日期的文章进行去重。 + 若 date_str 为空则处理今天(UTC)的文章。 + 返回统计:{"total": x, "duplicate_groups": y, "representatives": z} + """ + if date_str is None: + date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + # 只清空该日期已有的去重组,避免破坏历史数据 + day_start = datetime.strptime(date_str, "%Y-%m-%d") + day_end = day_start + timedelta(days=1) + + old_groups = db.query(DuplicateGroup).filter(DuplicateGroup.brief_date == date_str).all() + for og in old_groups: + for art in og.articles: + art.duplicate_group_id = None + art.is_representative = False + db.delete(og) + db.commit() + + # 重置该日期文章的去重标记 + articles = ( + db.query(EnrichedArticle) + .filter( + EnrichedArticle.fetched_at >= day_start, + EnrichedArticle.fetched_at < day_end, + ) + .order_by(EnrichedArticle.published_at) + .all() + ) + + if not articles: + logger.info("日期 %s 无文章可去重", date_str) + return {"total": 0, "duplicate_groups": 0, "representatives": 0} + + # 先 URL 去重:相同 link 只保留一篇 + unique_articles: List[EnrichedArticle] = [] + seen_links: set = set() + url_dup_count = 0 + for art in articles: + link = (art.link or "").strip() + if link and link in seen_links: + url_dup_count += 1 + continue + if link: + seen_links.add(link) + unique_articles.append(art) + + clusters = _find_duplicate_clusters( + unique_articles, + title_threshold=title_threshold, + content_threshold=content_threshold, + ) + + stats = {"total": len(articles), "duplicate_groups": len(clusters), "representatives": 0} + + for cluster in clusters: + representative = _pick_representative(unique_articles, cluster) + member_ids = [unique_articles[i].id for i in cluster] + + group = DuplicateGroup( + representative_article_id=representative.id, + member_article_ids=member_ids, + similarity_matrix={}, # 可后续补充 + brief_date=date_str, + ) + db.add(group) + db.flush() + + for idx in cluster: + art = unique_articles[idx] + art.duplicate_group_id = group.id + art.is_representative = (art.id == representative.id) + + stats["representatives"] += 1 + + db.commit() + logger.info( + "去重完成: 日期=%s, 总文章=%d, 重复组=%d, URL 重复=%d", + date_str, stats["total"], stats["duplicate_groups"], url_dup_count + ) + return stats diff --git a/app/rss_client.py b/app/rss_client.py new file mode 100644 index 0000000..d0e68b5 --- /dev/null +++ b/app/rss_client.py @@ -0,0 +1,104 @@ +"""调用 rssKeeper 外部 API""" +from datetime import datetime, timedelta +from typing import List, Optional, Dict, Any +import logging + +import requests + +from config import settings + +logger = logging.getLogger(__name__) + + +class RSSKeeperClient: + """rssKeeper 外部 API 客户端""" + + def __init__(self, base_url: Optional[str] = None, timeout: int = 30): + self.base_url = (base_url or settings.RSSKEEPER_BASE_URL).rstrip("/") + self.timeout = timeout + + def _get(self, path: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + url = f"{self.base_url}{path}" + try: + resp = requests.get(url, params=params, timeout=self.timeout) + resp.raise_for_status() + return resp.json() + except requests.RequestException as exc: + logger.error("请求 rssKeeper 失败: %s - %s", url, exc) + raise + + def fetch_recent( + self, + hours: int = 24, + limit: int = 200, + feed_id: Optional[int] = None, + category: Optional[str] = None, + search: Optional[str] = None, + unread_only: bool = False, + ) -> List[Dict[str, Any]]: + """获取最近 N 小时的文章""" + params = { + "hours": hours, + "limit": limit, + "unread_only": unread_only, + } + if feed_id is not None: + params["feed_id"] = feed_id + if category is not None: + params["category"] = category + if search is not None: + params["search"] = search + + data = self._get("/api/v1/external/recent", params=params) + return data.get("articles", []) + + def fetch_by_date(self, date: str, category: Optional[str] = None) -> Dict[str, Any]: + """获取指定日期的文章聚合""" + params: Dict[str, Any] = {"date": date} + if category is not None: + params["category"] = category + return self._get("/api/v1/external/summary", params=params) + + def fetch_feeds( + self, + health_status: Optional[str] = None, + category: Optional[str] = None, + error_type: Optional[str] = None, + is_active: Optional[bool] = True, + ) -> List[Dict[str, Any]]: + """获取 RSS 源列表""" + params: Dict[str, Any] = {} + if health_status is not None: + params["health_status"] = health_status + if category is not None: + params["category"] = category + if error_type is not None: + params["error_type"] = error_type + if is_active is not None: + params["is_active"] = is_active + + data = self._get("/api/v1/external/feeds", params=params) + return data.get("feeds", []) + + def fulltext_search( + self, + q: str, + limit: int = 50, + offset: int = 0, + category: Optional[str] = None, + feed_id: Optional[int] = None, + ) -> Dict[str, Any]: + """全文搜索文章""" + params: Dict[str, Any] = { + "q": q, + "limit": limit, + "offset": offset, + } + if category is not None: + params["category"] = category + if feed_id is not None: + params["feed_id"] = feed_id + return self._get("/api/v1/external/search", params=params) + + +rss_client = RSSKeeperClient() diff --git a/app/scorer.py b/app/scorer.py new file mode 100644 index 0000000..4b3984b --- /dev/null +++ b/app/scorer.py @@ -0,0 +1,147 @@ +"""基于规则计算文章热度、重要性、重复性分数""" +import logging +import math +from datetime import datetime, timedelta, timezone +from typing import List + +from sqlalchemy.orm import Session + +from config import settings +from models import EnrichedArticle, Taxonomy +from app.tagger import _count_matches, _normalize + +logger = logging.getLogger(__name__) + + +# 综合分权重:热度 30%,重要性 50%,重复性 20% +COMPOSITE_WEIGHT_HEAT = 0.3 +COMPOSITE_WEIGHT_IMPORTANCE = 0.5 +COMPOSITE_WEIGHT_DUPLICATION = 0.2 + + +def _build_text(article: EnrichedArticle) -> str: + """构建用于打分的文本""" + return " ".join([ + article.title or "", + article.ai_summary or article.original_summary or "", + article.content or "", + ]) + + +def _score_by_rules(article: EnrichedArticle, rules: List[Taxonomy]) -> float: + """基于规则关键词匹配计算分数,规则权重越大得分越高""" + text = _build_text(article) + if not text.strip() or not rules: + return 0.0 + + score = 0.0 + for rule in rules: + keywords = rule.keywords or [] + hits = _count_matches(text, keywords) + if hits > 0: + score += min(hits, 5) * rule.weight * 10 + + return min(score, 100.0) + + +def _freshness_score(article: EnrichedArticle) -> float: + """根据发布时间计算新鲜度加成""" + now = datetime.now(timezone.utc) + published = article.published_at + if not published: + return 0.0 + + # 数据库中读出的 published_at 可能为 naive,默认按 UTC 处理 + if published.tzinfo is None: + published = published.replace(tzinfo=timezone.utc) + + hours_old = (now - published).total_seconds() / 3600 + if hours_old < 0: + hours_old = 0 + + # 24 小时内满分 20 分,超过 72 小时降至 0 + if hours_old <= 24: + return 20.0 + elif hours_old >= 72: + return 0.0 + else: + return 20.0 * (1 - (hours_old - 24) / 48) + + +def compute_heat_score(article: EnrichedArticle, heat_rules: List[Taxonomy]) -> float: + """热度分:关键词命中 + 新鲜度""" + base = _score_by_rules(article, heat_rules) + fresh = _freshness_score(article) + return min(base + fresh, 100.0) + + +def compute_importance_score(article: EnrichedArticle, importance_rules: List[Taxonomy]) -> float: + """重要性分:关键词命中""" + return _score_by_rules(article, importance_rules) + + +def compute_duplication_score(duplicate_count: int, max_count: int = 5) -> float: + """ + 重复性分:同一主题在多个源出现次数越多,重复性分越高。 + 出现 1 次为 0 分,>= max_count 为 100 分。 + """ + if duplicate_count <= 1: + return 0.0 + score = (duplicate_count - 1) / (max_count - 1) * 100.0 + return min(score, 100.0) + + +def compute_composite_score(heat: float, importance: float, duplication: float) -> float: + """计算综合分""" + return round( + heat * COMPOSITE_WEIGHT_HEAT + + importance * COMPOSITE_WEIGHT_IMPORTANCE + + duplication * COMPOSITE_WEIGHT_DUPLICATION, + 2, + ) + + +def score_articles( + db: Session, + article_ids: List[int] = None, + update_duplication: bool = False, +) -> int: + """ + 对文章计算热度/重要性/综合分。 + 若 update_duplication=True,则同时根据重复组更新重复性分数。 + 返回处理数量。 + """ + heat_rules = db.query(Taxonomy).filter(Taxonomy.kind == "heat_rule").all() + importance_rules = db.query(Taxonomy).filter(Taxonomy.kind == "importance_rule").all() + + query = db.query(EnrichedArticle) + if article_ids: + query = query.filter(EnrichedArticle.id.in_(article_ids)) + + articles = query.all() + count = 0 + for article in articles: + article.heat_score = compute_heat_score(article, heat_rules) + article.importance_score = compute_importance_score(article, importance_rules) + + if update_duplication: + dup_count = 0 + if article.duplicate_group_id: + group = article.duplicate_group + if group and group.member_article_ids: + # 非代表成员数量才是真正的重复次数 + dup_count = max(len(group.member_article_ids) - 1, 0) + article.duplication_score = compute_duplication_score(dup_count) + + article.composite_score = compute_composite_score( + article.heat_score, + article.importance_score, + article.duplication_score, + ) + count += 1 + if count % 50 == 0: + db.commit() + + db.commit() + logger.info("打分完成: %d 篇文章", count) + return count diff --git a/app/settings_manager.py b/app/settings_manager.py new file mode 100644 index 0000000..7800daf --- /dev/null +++ b/app/settings_manager.py @@ -0,0 +1,188 @@ +"""运行时配置管理:支持环境变量作为默认值,数据库覆盖""" +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional + +from sqlalchemy.orm import Session + +from config import settings +from models import AppSetting + +logger = logging.getLogger(__name__) + + +# 可在 Web UI 中编辑的配置项清单 +EDITABLE_SETTINGS = { + "RSSKEEPER_BASE_URL": {"description": "rssKeeper 服务地址", "sensitive": False}, + "OPENAI_API_KEY": {"description": "LLM API Key", "sensitive": True}, + "OPENAI_BASE_URL": {"description": "LLM API 基础地址", "sensitive": False}, + "OPENAI_MODEL": {"description": "LLM 模型名", "sensitive": False}, + "OPENAI_TIMEOUT": {"description": "LLM 调用超时(秒)", "sensitive": False}, + "OPENAI_MAX_RETRIES": {"description": "LLM 调用最大重试次数", "sensitive": False}, + "SUMMARIZE_INTERVAL_MINUTES": {"description": "摘要任务间隔(分钟)", "sensitive": False}, + "TAG_SCORE_INTERVAL_MINUTES": {"description": "分类/打分/去重任务间隔(分钟)", "sensitive": False}, + "DAILY_BRIEF_HOUR": {"description": "每日简报生成小时", "sensitive": False}, + "DAILY_BRIEF_MINUTE": {"description": "每日简报生成分钟", "sensitive": False}, + "TITLE_SIMILARITY_THRESHOLD": {"description": "标题相似度阈值", "sensitive": False}, + "CONTENT_SIMILARITY_THRESHOLD": {"description": "内容相似度阈值", "sensitive": False}, + "MAX_AI_SUMMARY_LENGTH": {"description": "AI 摘要最大长度", "sensitive": False}, + "MIN_ORIGINAL_SUMMARY_LENGTH": {"description": "原始摘要最小长度", "sensitive": False}, + "BRIEF_TOP_N_PER_CATEGORY": {"description": "简报每分类显示文章数", "sensitive": False}, + "LOG_LEVEL": {"description": "日志级别", "sensitive": False}, + "API_TOKEN": {"description": "API 鉴权 Token(为空时不启用鉴权)", "sensitive": True}, + "CORS_ALLOWED_ORIGINS": {"description": "CORS 允许来源(逗号分隔)", "sensitive": False}, +} + + +def _get_env_default(key: str) -> str: + """从 Pydantic Settings 获取环境变量默认值""" + value = getattr(settings, key, "") + return str(value) if value is not None else "" + + +def _mask_sensitive(value: str) -> str: + """对敏感值做部分脱敏""" + if not value: + return "" + if len(value) <= 8: + return "*" * len(value) + return value[:4] + "..." + value[-4:] + + +def init_default_settings(db: Session) -> None: + """若配置表为空,使用环境变量初始化默认配置""" + existing_count = db.query(AppSetting).count() + if existing_count > 0: + return + + for key, meta in EDITABLE_SETTINGS.items(): + default_value = _get_env_default(key) + db.add( + AppSetting( + key=key, + value=default_value, + description=meta["description"], + is_sensitive=meta["sensitive"], + ) + ) + + db.commit() + logger.info("已初始化默认配置项: %d 条", len(EDITABLE_SETTINGS)) + + +def get_setting(db: Session, key: str, default: Any = None) -> Any: + """从数据库读取配置,若不存在则返回环境变量默认值""" + setting = db.query(AppSetting).filter(AppSetting.key == key).first() + if setting: + return setting.value + return _get_env_default(key) if default is None else default + + +def get_setting_value(key: str, default: Any = None) -> Any: + """不依赖 Session,直接创建临时会话读取""" + from database import SessionLocal + db = SessionLocal() + try: + return get_setting(db, key, default) + finally: + db.close() + + +def set_setting(db: Session, key: str, value: str) -> bool: + """更新单个配置项""" + if key not in EDITABLE_SETTINGS: + return False + + setting = db.query(AppSetting).filter(AppSetting.key == key).first() + if setting: + setting.value = str(value) + setting.updated_at = datetime.now(timezone.utc) + else: + meta = EDITABLE_SETTINGS[key] + db.add( + AppSetting( + key=key, + value=str(value), + description=meta["description"], + is_sensitive=meta["sensitive"], + ) + ) + + db.commit() + logger.info("配置已更新: %s", key) + return True + + +def list_settings(db: Session, mask_sensitive: bool = True) -> List[Dict[str, Any]]: + """列出所有可编辑配置""" + db_settings = {s.key: s for s in db.query(AppSetting).all()} + result = [] + + for key, meta in EDITABLE_SETTINGS.items(): + setting = db_settings.get(key) + value = setting.value if setting else _get_env_default(key) + is_sensitive = meta["sensitive"] + + if is_sensitive and mask_sensitive: + display_value = _mask_sensitive(value) + is_masked = True + else: + display_value = value + is_masked = False + + result.append({ + "key": key, + "value": display_value, + "real_value": value if not mask_sensitive else None, + "description": meta["description"], + "is_sensitive": is_sensitive, + "is_masked": is_masked, + "updated_at": setting.updated_at.isoformat() if setting else None, + }) + + return result + + +def reset_settings(db: Session) -> None: + """将所有配置重置为环境变量默认值""" + for key in EDITABLE_SETTINGS: + set_setting(db, key, _get_env_default(key)) + logger.info("配置已重置为环境变量默认值") + + +def apply_db_settings_to_config(db: Session = None) -> None: + """将数据库中的配置覆盖到全局 settings 对象,重启后生效""" + close_db = False + if db is None: + from database import SessionLocal + db = SessionLocal() + close_db = True + try: + for key in EDITABLE_SETTINGS: + db_value = get_setting(db, key) + if db_value is None or db_value == "": + continue + field_info = settings.model_fields.get(key) + if field_info is None: + continue + target_type = field_info.annotation + try: + if target_type is int: + converted = int(db_value) + elif target_type is float: + converted = float(db_value) + elif target_type is bool: + converted = db_value.lower() in ("true", "1", "yes") + elif target_type is Path: + converted = Path(db_value) + else: + converted = db_value + setattr(settings, key, converted) + logger.debug("已应用配置: %s=%s", key, converted) + except Exception as exc: + logger.error("应用配置 %s=%s 失败: %s", key, db_value, exc) + raise ValueError(f"配置项 {key} 的值无效: {db_value}") from exc + finally: + if close_db: + db.close() diff --git a/app/summarizer.py b/app/summarizer.py new file mode 100644 index 0000000..696b645 --- /dev/null +++ b/app/summarizer.py @@ -0,0 +1,154 @@ +"""文章摘要生成器:对无摘要或短摘要文章调用 LLM 生成 AI 摘要""" +import logging +from datetime import datetime, timezone +from typing import List, Dict, Any + +from sqlalchemy.orm import Session + +from app.ai_client import ai_client +from app.rss_client import rss_client +from config import settings +from models import EnrichedArticle + +logger = logging.getLogger(__name__) + + +SUMMARY_SYSTEM_PROMPT = """你是一位擅长阅读 RSS 新闻并提炼摘要的助手。 +请用简洁流畅的中文总结文章核心内容,要求: +1. 长度控制在 {max_length} 个汉字以内。 +2. 包含文章最重要的 1-3 个要点。 +3. 不要添加个人评价,不要复述原文标题。 +4. 若原文是英文,请用中文输出摘要。 +""" + + +SUMMARY_USER_PROMPT_TEMPLATE = """请为以下文章生成摘要。 + +标题:{title} +作者:{author} +来源:{feed_title} + +正文: +{content} +""" + + +def _needs_summary(article: EnrichedArticle) -> bool: + """判断是否需要生成 AI 摘要""" + if not article.ai_summary: + return True + original = article.original_summary or "" + if len(original.strip()) < settings.MIN_ORIGINAL_SUMMARY_LENGTH: + return True + return False + + +def _prepare_content(raw_content: str, max_chars: int = 8000) -> str: + """清洗并截断正文,避免超过 LLM 上下文""" + text = raw_content or "" + # 简单去除多余空白 + text = " ".join(text.split()) + return text[:max_chars] + + +def _generate_summary(article: EnrichedArticle) -> str: + """调用 LLM 生成单篇文章摘要""" + content = _prepare_content(article.content or article.original_summary or "") + if not content.strip(): + # 如果连原始摘要都没有,只能基于标题生成 + content = article.title or "" + + system_prompt = SUMMARY_SYSTEM_PROMPT.format(max_length=settings.MAX_AI_SUMMARY_LENGTH) + user_prompt = SUMMARY_USER_PROMPT_TEMPLATE.format( + title=article.title or "", + author=article.author or "", + feed_title=article.feed_title or "", + content=content, + ) + + try: + summary = ai_client.chat_completion( + system_prompt=system_prompt, + user_prompt=user_prompt, + temperature=0.3, + ) + return summary[: settings.MAX_AI_SUMMARY_LENGTH] + except Exception as exc: + logger.error("生成 article_id=%d 摘要失败: %s", article.rk_article_id, exc) + return "" + + +def _article_from_rss(raw: Dict[str, Any]) -> Dict[str, Any]: + """把 rssKeeper 返回的文章转换为可写入 enriched 表的字典""" + published_at = raw.get("published_at") + if isinstance(published_at, str): + try: + published_at = datetime.fromisoformat(published_at.replace("Z", "+00:00")) + except Exception: + published_at = None + + return { + "rk_article_id": raw["id"], + "title": raw.get("title", "") or "", + "link": raw.get("link", "") or "", + "feed_id": raw.get("feed_id", 0), + "feed_title": raw.get("feed_title", "") or "", + "feed_category": raw.get("category", "") or "", + "author": raw.get("author", "") or "", + "published_at": published_at, + "original_summary": raw.get("summary", "") or "", + "content": raw.get("content", "") or "", + } + + +def fetch_and_summarize(db: Session, hours: int = 24, limit: int = 200) -> Dict[str, int]: + """ + 拉取最近文章,补充 AI 摘要。 + 返回统计信息:{"fetched": x, "created": y, "summarized": z} + """ + articles = rss_client.fetch_recent(hours=hours, limit=limit) + if not articles: + logger.info("未拉取到新文章") + return {"fetched": 0, "created": 0, "summarized": 0} + + stats = {"fetched": len(articles), "created": 0, "summarized": 0} + + for raw in articles: + data = _article_from_rss(raw) + article = db.query(EnrichedArticle).filter( + EnrichedArticle.rk_article_id == data["rk_article_id"] + ).first() + + if article is None: + article = EnrichedArticle(**data) + db.add(article) + db.flush() + stats["created"] += 1 + else: + # 更新已有记录的基础字段 + article.title = data["title"] or article.title + article.link = data["link"] or article.link + article.feed_title = data["feed_title"] or article.feed_title + article.feed_category = data["feed_category"] or article.feed_category + article.author = data["author"] or article.author + article.published_at = data["published_at"] or article.published_at + article.original_summary = data["original_summary"] or article.original_summary + article.content = data["content"] or article.content + article.fetched_at = datetime.now(timezone.utc) + + if _needs_summary(article): + ai_summary = _generate_summary(article) + if ai_summary: + article.ai_summary = ai_summary + stats["summarized"] += 1 + + # 每 10 篇提交一次,避免长时间事务 + if stats["summarized"] % 10 == 0: + db.commit() + + db.commit() + logger.info( + "摘要任务完成: fetched=%d, created=%d, summarized=%d", + stats["fetched"], stats["created"], stats["summarized"] + ) + return stats diff --git a/app/tagger.py b/app/tagger.py new file mode 100644 index 0000000..9ee530b --- /dev/null +++ b/app/tagger.py @@ -0,0 +1,116 @@ +"""基于规则给文章分类、打标签""" +import logging +import re +from typing import List, Dict, Any, Tuple + +from sqlalchemy.orm import Session + +from models import EnrichedArticle, Taxonomy + +logger = logging.getLogger(__name__) + + +def _normalize(text: str) -> str: + """规范化文本用于关键词匹配""" + if not text: + return "" + # 去除多余空白,统一小写 + text = " ".join(text.split()) + return text.lower() + + +def _count_matches(text: str, keywords: List[str]) -> int: + """统计关键词在文本中的命中次数(不区分大小写)""" + if not text or not keywords: + return 0 + text_norm = _normalize(text) + count = 0 + for kw in keywords: + if not kw: + continue + kw_norm = _normalize(kw) + # 简单子串匹配;中文关键词也适用 + count += text_norm.count(kw_norm) + return count + + +def classify_article(article: EnrichedArticle, categories: List[Taxonomy]) -> str: + """为文章选择最匹配的分类""" + text = " ".join([ + article.title or "", + article.ai_summary or article.original_summary or "", + article.content or "", + ]) + + best_category = "" + best_score = 0 + + for cat in categories: + score = _count_matches(text, cat.keywords or []) + # 如果文章来自某个 Feed 分类,给予少量加成 + if article.feed_category and article.feed_category == cat.name: + score += 2 + if score > best_score: + best_score = score + best_category = cat.name + + # 若完全没有命中,回退到源分类 + if not best_category and article.feed_category: + best_category = article.feed_category + + if not best_category: + best_category = "未分类" + + return best_category + + +def tag_article(article: EnrichedArticle, tags: List[Taxonomy]) -> List[str]: + """为文章打上命中的标签""" + text = " ".join([ + article.title or "", + article.ai_summary or article.original_summary or "", + article.content or "", + ]) + + matched = [] + for tag in tags: + if _count_matches(text, tag.keywords or []) > 0: + matched.append(tag.name) + + # 去重并保持顺序 + return list(dict.fromkeys(matched)) + + +def tag_articles(db: Session, article_ids: List[int] = None) -> int: + """ + 对文章进行分类和打标签。 + 若指定 article_ids 则只处理这些文章;否则处理所有未分类或没有标签的文章。 + 返回处理数量。 + """ + categories = db.query(Taxonomy).filter(Taxonomy.kind == "category").all() + tags = db.query(Taxonomy).filter(Taxonomy.kind == "tag").all() + + if not categories: + logger.warning("taxonomy 中无 category 数据,跳过分类") + return 0 + + query = db.query(EnrichedArticle) + if article_ids: + query = query.filter(EnrichedArticle.id.in_(article_ids)) + else: + query = query.filter( + (EnrichedArticle.category == "") | (EnrichedArticle.category == None) + ) + + articles = query.all() + count = 0 + for article in articles: + article.category = classify_article(article, categories) + article.tags = tag_article(article, tags) + count += 1 + if count % 50 == 0: + db.commit() + + db.commit() + logger.info("分类/打标签完成: %d 篇文章", count) + return count diff --git a/app/taxonomy.py b/app/taxonomy.py new file mode 100644 index 0000000..a67c4af --- /dev/null +++ b/app/taxonomy.py @@ -0,0 +1,140 @@ +"""分类/标签/打分规则体系的初始化与维护""" +import json +import logging +from typing import List, Dict, Any + +from sqlalchemy.orm import Session + +from app.ai_client import ai_client +from app.rss_client import rss_client +from models import Taxonomy + +logger = logging.getLogger(__name__) + + +TAXONOMY_SYSTEM_PROMPT = """你是一位专业的信息分类与内容分析专家。 +请根据用户提供的 RSS 文章样本,生成一套适合的中文内容分类体系、标签体系和打分规则。 + +输出必须是合法的 JSON,格式如下: +{ + "categories": [ + {"name": "科技", "description": "人工智能、芯片、互联网、软件等", "keywords": ["AI", "芯片", "大模型", ...]} + ], + "tags": [ + {"name": "人工智能", "description": "...", "keywords": ["AI", "人工智能", "大模型", ...]} + ], + "heat_rules": [ + {"name": "热点事件", "keywords": ["突发", "重磅", "刚刚", "发布"], "weight": 1.5} + ], + "importance_rules": [ + {"name": "政策法规", "keywords": ["政策", "监管", "法规", "征求意见"], "weight": 1.5} + ], + "duplication_indicators": [ + {"name": "同一事件", "keywords": ["宣布", "发布", "推出"], "weight": 1.0} + ] +} + +要求: +1. categories 数量控制在 8-12 个,覆盖科技、财经、新闻、设计、生活等常见 RSS 主题。 +2. tags 数量控制在 30-50 个,尽量细化但避免过度重叠。 +3. heat_rules 和 importance_rules 各 10-20 条,weight 范围 0.5-2.0。 +4. 所有 keywords 用中文或中英双语,便于后续关键词匹配。 +5. 不要输出任何解释文字,只输出 JSON。 +""" + + +def _build_sample_prompt(articles: List[Dict[str, Any]]) -> str: + lines = [f"共有 {len(articles)} 篇文章样本:"] + for idx, art in enumerate(articles[:50], 1): + title = art.get("title", "") + summary = art.get("summary", "") or art.get("content", "")[:300] + feed = art.get("feed_title", "") + cat = art.get("category", "") + lines.append(f"\n[{idx}] 标题:{title}") + lines.append(f" 来源:{feed} | 源分类:{cat}") + lines.append(f" 摘要:{summary[:400]}") + return "\n".join(lines) + + +def bootstrap_taxonomy(db: Session, force: bool = False) -> bool: + """ + 初始化分类/标签/打分规则。 + 若 force=True 则清空后重建;否则仅在表为空时初始化。 + """ + existing = db.query(Taxonomy).first() + if existing and not force: + logger.info("taxonomy 表已存在,跳过初始化") + return False + + if force: + db.query(Taxonomy).delete() + db.commit() + logger.info("强制重新初始化 taxonomy") + + logger.info("开始从 rssKeeper 拉取样本文章并生成分类体系...") + articles = rss_client.fetch_recent(hours=24 * 7, limit=200) + if not articles: + logger.warning("未获取到样本文章,无法生成分类体系") + return False + + user_prompt = _build_sample_prompt(articles) + try: + result = ai_client.chat_completion_json( + system_prompt=TAXONOMY_SYSTEM_PROMPT, + user_prompt=user_prompt, + temperature=0.5, + ) + except Exception as exc: + logger.error("生成分类体系失败: %s", exc) + return False + + _save_taxonomy(db, result) + logger.info("taxonomy 初始化完成,共写入 %d 条规则", db.query(Taxonomy).count()) + return True + + +def _save_taxonomy(db: Session, data: Dict[str, Any]) -> None: + """把 LLM 返回的分类体系写入数据库""" + + def _add(kind: str, items: List[Dict[str, Any]], default_weight: float = 1.0): + for item in items: + name = item.get("name", "").strip() + if not name: + continue + keywords = item.get("keywords", []) + if isinstance(keywords, str): + keywords = [keywords] + db.add( + Taxonomy( + name=name, + kind=kind, + description=item.get("description", ""), + keywords=keywords, + weight=float(item.get("weight", default_weight)), + created_by_ai=True, + ) + ) + + _add("category", data.get("categories", [])) + _add("tag", data.get("tags", [])) + _add("heat_rule", data.get("heat_rules", []), default_weight=1.0) + _add("importance_rule", data.get("importance_rules", []), default_weight=1.0) + _add("duplication_rule", data.get("duplication_indicators", []), default_weight=1.0) + + db.commit() + + +def ensure_taxonomy(db: Session) -> bool: + """确保 taxonomy 表非空,若为空则触发初始化""" + existing = db.query(Taxonomy).first() + if existing: + return True + return bootstrap_taxonomy(db) + + +def list_taxonomy(db: Session, kind: str = None) -> List[Taxonomy]: + """列出分类体系规则""" + query = db.query(Taxonomy) + if kind: + query = query.filter(Taxonomy.kind == kind) + return query.order_by(Taxonomy.kind, Taxonomy.name).all() diff --git a/config.py b/config.py new file mode 100644 index 0000000..bf1fa2d --- /dev/null +++ b/config.py @@ -0,0 +1,66 @@ +"""配置管理 - 环境变量 + 默认值""" +import os +from pathlib import Path +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """应用配置""" + + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore") + + # RSSKeeper 连接 + RSSKEEPER_BASE_URL: str = "http://localhost:7329" + + # LLM API(兼容 OpenAI 格式) + OPENAI_API_KEY: str = "" + OPENAI_BASE_URL: str = "https://api.openai.com/v1" + OPENAI_MODEL: str = "gpt-4o-mini" + OPENAI_TIMEOUT: int = 60 + OPENAI_MAX_RETRIES: int = 3 + + # dataClean 数据目录 + DATA_DIR: Path = Path("/app/data") + DATABASE_URL: str = "/app/data/dataclean.db" + + # 简报输出目录 + BRIEF_OUTPUT_DIR: Path = Path("/app/data/briefs") + + # 调度时间(分钟) + SUMMARIZE_INTERVAL_MINUTES: int = 60 + TAG_SCORE_INTERVAL_MINUTES: int = 1440 + DAILY_BRIEF_HOUR: int = 8 + DAILY_BRIEF_MINUTE: int = 0 + + # 去重阈值 + TITLE_SIMILARITY_THRESHOLD: float = 0.85 + CONTENT_SIMILARITY_THRESHOLD: float = 0.80 + + # 摘要长度 + MAX_AI_SUMMARY_LENGTH: int = 300 + MIN_ORIGINAL_SUMMARY_LENGTH: int = 100 + + # 每篇简报每个分类显示文章数 + BRIEF_TOP_N_PER_CATEGORY: int = 10 + + # 日志级别 + LOG_LEVEL: str = "INFO" + + # Web UI / API 安全 + # 为空时不启用 API Token 鉴权(仅建议在内网使用);生产环境请设置强密码 + API_TOKEN: str = "" + # CORS 允许来源,逗号分隔;生产环境请填写具体域名,如 "https://dataclean.example.com" + CORS_ALLOWED_ORIGINS: str = "" + + @property + def database_path(self) -> str: + """返回 SQLite 数据库路径""" + return str(Path(self.DATABASE_URL)) + + @property + def brief_output_dir_path(self) -> Path: + """返回简报输出目录""" + return self.BRIEF_OUTPUT_DIR + + +settings = Settings() diff --git a/database.py b/database.py new file mode 100644 index 0000000..93c5c81 --- /dev/null +++ b/database.py @@ -0,0 +1,39 @@ +"""数据库连接与初始化""" +from pathlib import Path + +from sqlalchemy import create_engine, event +from sqlalchemy.orm import sessionmaker, declarative_base + +from config import settings + +engine = create_engine( + f"sqlite:///{settings.database_path}", + connect_args={"check_same_thread": False}, + echo=False, +) + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + + +@event.listens_for(engine, "connect") +def _set_sqlite_pragma(dbapi_conn, connection_record): + """启用 SQLite 外键约束""" + cursor = dbapi_conn.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + + +def get_db(): + """FastAPI 依赖注入用数据库会话""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +def init_db(): + """创建所有数据表,并确保数据库目录存在""" + Path(settings.DATABASE_URL).parent.mkdir(parents=True, exist_ok=True) + Base.metadata.create_all(bind=engine) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0034910 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +services: + dataclean: + build: . + container_name: dataclean + ports: + - "7331:7331" + volumes: + - ./data:/app/data + env_file: + - .env + restart: unless-stopped + environment: + - TZ=Asia/Shanghai + networks: + - dataclean-net + +networks: + dataclean-net: + driver: bridge diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..49e3163 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,13 @@ + + + + + + + dataClean - RSS 数据清洗 + + +
+ + + diff --git a/frontend/package-lock.json b/frontend/package-lock.json new file mode 100644 index 0000000..97d2136 --- /dev/null +++ b/frontend/package-lock.json @@ -0,0 +1,1628 @@ +{ + "name": "dataclean-frontend", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "dataclean-frontend", + "version": "1.0.0", + "dependencies": { + "@element-plus/icons-vue": "^2.3.1", + "axios": "^1.6.8", + "element-plus": "^2.6.3", + "vue": "^3.4.21", + "vue-router": "^4.3.0" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "vite": "^5.2.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.29.7", + "resolved": "https://registry.npmmirror.com/@babel/helper-string-parser/-/helper-string-parser-7.29.7.tgz", + "integrity": "sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw==", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-validator-identifier": { + "version": "7.29.7", + "resolved": "https://registry.npmmirror.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.29.7.tgz", + "integrity": "sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==", + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/parser": { + "version": "7.29.7", + "resolved": "https://registry.npmmirror.com/@babel/parser/-/parser-7.29.7.tgz", + "integrity": "sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg==", + "dependencies": { + "@babel/types": "^7.29.7" + }, + "bin": { + "parser": "bin/babel-parser.js" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@babel/types": { + "version": "7.29.7", + "resolved": "https://registry.npmmirror.com/@babel/types/-/types-7.29.7.tgz", + "integrity": "sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA==", + "dependencies": { + "@babel/helper-string-parser": "^7.29.7", + "@babel/helper-validator-identifier": "^7.29.7" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@ctrl/tinycolor": { + "version": "4.2.0", + "resolved": "https://registry.npmmirror.com/@ctrl/tinycolor/-/tinycolor-4.2.0.tgz", + "integrity": "sha512-kzyuwOAQnXJNLS9PSyrk0CWk35nWJW/zl/6KvnTBMFK65gm7U1/Z5BqjxeapjZCIhQcM/DsrEmcbRwDyXyXK4A==", + "engines": { + "node": ">=14" + } + }, + "node_modules/@element-plus/icons-vue": { + "version": "2.3.2", + "resolved": "https://registry.npmmirror.com/@element-plus/icons-vue/-/icons-vue-2.3.2.tgz", + "integrity": "sha512-OzIuTaIfC8QXEPmJvB4Y4kw34rSXdCJzxcD1kFStBvr8bK6X1zQAYDo0CNMjojnfTqRQCJ0I7prlErcoRiET2A==", + "peerDependencies": { + "vue": "^3.2.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", + "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/android-arm/-/android-arm-0.21.5.tgz", + "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", + "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/android-x64/-/android-x64-0.21.5.tgz", + "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", + "integrity": "sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", + "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", + "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", + "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", + "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", + "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", + "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", + "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", + "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", + "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", + "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", + "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", + "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", + "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", + "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", + "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", + "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", + "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", + "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@floating-ui/core": { + "version": "1.7.5", + "resolved": "https://registry.npmmirror.com/@floating-ui/core/-/core-1.7.5.tgz", + "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==", + "dependencies": { + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.7.6", + "resolved": "https://registry.npmmirror.com/@floating-ui/dom/-/dom-1.7.6.tgz", + "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==", + "dependencies": { + "@floating-ui/core": "^1.7.5", + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.11", + "resolved": "https://registry.npmmirror.com/@floating-ui/utils/-/utils-0.2.11.tgz", + "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==" + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmmirror.com/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==" + }, + "node_modules/@popperjs/core": { + "name": "@sxzz/popperjs-es", + "version": "2.11.8", + "resolved": "https://registry.npmmirror.com/@sxzz/popperjs-es/-/popperjs-es-2.11.8.tgz", + "integrity": "sha512-wOwESXvvED3S8xBmcPWHs2dUuzrE4XiZeFu7e1hROIJkm02a49N120pmOXxY33sBb6hArItm5W5tcg1cBtV+HQ==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/popperjs" + } + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.61.1.tgz", + "integrity": "sha512-JnBB8MdXj45cajvTuO5FmPlvFVJRQgvrz1uSEl3NwqFnReAPGwb8EanbGi4z2nRaqLzjJSv5/JmycoTKlRZxHA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.61.1.tgz", + "integrity": "sha512-Jx2g7iSjw4AOT0HDPHM9RV3GNjRXwybWtSFZiZAYUTjUwjVrYIwq3kBf+LnhqJlzXFAqTAh2F7IGI+O568exPw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.61.1.tgz", + "integrity": "sha512-0F1L/Z3Eqv8mT2n3dCpeO8GcTvHvVqkP5/t6DMsn0KzhYVcg+s7Ncl5DS8qjKYEeio6Az0Gt6nyBORay5qIlCA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.61.1.tgz", + "integrity": "sha512-qLttcH871ujY4YcVfUSShhOw+CsoTatYz8gRbHO7Bb92QH059/P0y5do1KMs41fY0BpD2x4AJH/gID0zFiqVKQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.61.1.tgz", + "integrity": "sha512-fUI4RapGE0Oh3mb8mgfvC1O2nU1RpDZUKnDQm3xB1Ipg7C2wTs5Kstz7G2uWK99a8S2yTMq8/P4uycwNa0nJyw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.61.1.tgz", + "integrity": "sha512-H5YrdvJaDtI/U9/emrD4b++xkvp3y/JvOe4rizHbxvkyMfRS/CiRYdji+Pl8D0brEaNFWUh1drQxgAGIl6Xudw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.61.1.tgz", + "integrity": "sha512-Q8CBCCQtDFrYtXoeUXSrnFXKOnyUhx6bz+SkL6A0E7V8kAiCJ5pamq1WtbfpVGhR5TSpXY6ak3avmDc5fHTyJA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.61.1.tgz", + "integrity": "sha512-nwnhk1581l0FBVellGcVCAT0Oi06onEA3WB53sf01VO3I0UPBkMH9sXONYME2K0ovXcNayJfNtHfm6mpJElatQ==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.61.1.tgz", + "integrity": "sha512-x5Xr49hwt3hdW75UOZm3395YwwzPyauktslv29KpWL/T+vVAzoT3azLcTWv0eMciBNrx+DYjH4paehHoLpPvpg==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.61.1.tgz", + "integrity": "sha512-unMS3H73DpaoPyyEVPjGKleM/s0mkmsauTENpw4INQY8y4+IuLNjkueQ5QCtC0D3N38Y38yhAU8OoZ20S2Tm6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.61.1.tgz", + "integrity": "sha512-zNZzGRnAhwjFEYmvphJRV5XaQGjs62cCmeYYHUT//NbvEnHauw+I85nGG+SiVg5ld4GX8D1IbKIX+ozITQnhMQ==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loong64-musl": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.61.1.tgz", + "integrity": "sha512-LdpWGL8X209B2SIvWjqlc8VZgM6PKfontSerGepuldQmHYrAOtnMCXeJkxXGbC+PPZVOuu5czJo7fNV6aeW8rQ==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.61.1.tgz", + "integrity": "sha512-EC5kTtNaNGOmbMGqar8dvJy6y/hg99GAwjfBz++pxZhQATXGcRjd6c5en5wcbru0vkRmiMGsQKdMJOOf6sza4g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-musl": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.61.1.tgz", + "integrity": "sha512-8hiwp6D4acEcNK78I4rP0/XtS1sknWIAMJBPdR4l6zUtyTm5KiTDr5bXmWt4foY7nAN7AThDHgkLIEZOWKbzWw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.61.1.tgz", + "integrity": "sha512-10dh/h/BqA7DuMPWSxkR8uks18FRwnwOEqr5zOTEl+NOwP/OMzKX8OFR/Of9xxDA7D5qef1Nzar5WDD2kCCr1g==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.61.1.tgz", + "integrity": "sha512-YKJ5lg35DP17gcAOggnihe+APw9HLyj1Xn7gsmGumBJAUDa6NGXNixJzmkWLhcK9TOuuyQjdamzvJefkO7qHZQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.61.1.tgz", + "integrity": "sha512-Mlil5G2Jj6a7B3LWGctg+XPL9vdXYuzCtNXfxOQ0nPjc2m6ueUktocPGH9bnAM0bNRKb/bAWTujUU7IJQdQA+g==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.61.1.tgz", + "integrity": "sha512-bVWIOIk6pV01p4CdUbPP7CJ/434z+OooYjDuFcR+44N35YvKUC66G8MGnvcWx5mWKW3g61J+t74l3Kj15Kwn2Q==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.61.1.tgz", + "integrity": "sha512-qy5pBvZbqNFheBz61R1rzsezjm0J7O2oNGoWtGoY89SZYLUfxAJTBAqDChqAIdB4rCiIbi9nF7yZ83GnNiLwSw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-openbsd-x64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.61.1.tgz", + "integrity": "sha512-E83TXjI4zm0+5f2qO+UOudaCYIhYwpJ5jq6YCZNIZ+6CbfhKrkAGezeiASBL9ElxAxFsRS9ZhESv8mfnj6TKeg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ] + }, + "node_modules/@rollup/rollup-openharmony-arm64": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.61.1.tgz", + "integrity": "sha512-fbWnKqVkjrJN38vNe3ahkbk6iejS/3b0Nt7EEtPpE6RBacZcGXNKbzfHN3GUUlXOPghUg0j6XUGrtjX9z1sIvA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "openharmony" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.61.1.tgz", + "integrity": "sha512-ArMl38iVAbk0New1ogihQNY6iphLi4ZaRsa037gUzv5yeKPY8TD3Dmy4x2RNC1VztU/uqm+G+/RwFrSka3Oy2g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.61.1.tgz", + "integrity": "sha512-0mYtjHS9ucAbcATycCNK9IGBk/cCe/ma7EmSLGZdsxnOA8cjRIyU04wDpVAD9NiOfLUR9KTxdiO53uOkherqjQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-gnu": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.61.1.tgz", + "integrity": "sha512-gK1iCEPfpoSG9wfBihXxvBMi8ZfcWffYkEsC/Eih+iFENTaewvNcrEQ69lIOWYO5pePHKLHHO7nq5AILGO/HQQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.61.1.tgz", + "integrity": "sha512-X+zaP2x+j4RXGfbp/seSoRHWnPxzApilDszisZxbYH5C/jTxFhCtDNdPGZb9lJyYPs24wGxruPF7Y+sIXt9Gzw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@types/estree": { + "version": "1.0.9", + "resolved": "https://registry.npmmirror.com/@types/estree/-/estree-1.0.9.tgz", + "integrity": "sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg==", + "dev": true + }, + "node_modules/@types/lodash": { + "version": "4.17.24", + "resolved": "https://registry.npmmirror.com/@types/lodash/-/lodash-4.17.24.tgz", + "integrity": "sha512-gIW7lQLZbue7lRSWEFql49QJJWThrTFFeIMJdp3eH4tKoxm1OvEPg02rm4wCCSHS0cL3/Fizimb35b7k8atwsQ==" + }, + "node_modules/@types/lodash-es": { + "version": "4.17.12", + "resolved": "https://registry.npmmirror.com/@types/lodash-es/-/lodash-es-4.17.12.tgz", + "integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==", + "dependencies": { + "@types/lodash": "*" + } + }, + "node_modules/@types/web-bluetooth": { + "version": "0.0.21", + "resolved": "https://registry.npmmirror.com/@types/web-bluetooth/-/web-bluetooth-0.0.21.tgz", + "integrity": "sha512-oIQLCGWtcFZy2JW77j9k8nHzAOpqMHLQejDA48XXMWH6tjCQHz5RCFz1bzsmROyL6PUm+LLnUiI4BCn221inxA==" + }, + "node_modules/@vitejs/plugin-vue": { + "version": "5.2.4", + "resolved": "https://registry.npmmirror.com/@vitejs/plugin-vue/-/plugin-vue-5.2.4.tgz", + "integrity": "sha512-7Yx/SXSOcQq5HiiV3orevHUFn+pmMB4cgbEkDYgnkUWb0WfeQ/wa2yFv6D5ICiCQOVpjA7vYDXrC7AGO8yjDHA==", + "dev": true, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "peerDependencies": { + "vite": "^5.0.0 || ^6.0.0", + "vue": "^3.2.25" + } + }, + "node_modules/@vue/compiler-core": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/compiler-core/-/compiler-core-3.5.38.tgz", + "integrity": "sha512-s99aGxWYig9ErHbct27KXEGhrBYlRI6c4MwAgXErOAbX9xiW37/uMa+XUDO69zLz83dng8UUZ70CTOJrLrYrEQ==", + "dependencies": { + "@babel/parser": "^7.29.7", + "@vue/shared": "3.5.38", + "entities": "^7.0.1", + "estree-walker": "^2.0.2", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-dom": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/compiler-dom/-/compiler-dom-3.5.38.tgz", + "integrity": "sha512-JTqp25l8aFfJYF7/KmsXZjAxJz7T+SjmTJLoXVjHtc2BrSgSiW2n9Aem/cWq1OPe68A8JL06B3eVdhlP0H4TVw==", + "dependencies": { + "@vue/compiler-core": "3.5.38", + "@vue/shared": "3.5.38" + } + }, + "node_modules/@vue/compiler-sfc": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/compiler-sfc/-/compiler-sfc-3.5.38.tgz", + "integrity": "sha512-DuA2GiZawSEW442iw/9+Fkol8hTgb4Ke5KkhmSry65QA7YuyMbIdy8p0XZRMvNwJdgRz307W8g1CSzdvS4nuNg==", + "dependencies": { + "@babel/parser": "^7.29.7", + "@vue/compiler-core": "3.5.38", + "@vue/compiler-dom": "3.5.38", + "@vue/compiler-ssr": "3.5.38", + "@vue/shared": "3.5.38", + "estree-walker": "^2.0.2", + "magic-string": "^0.30.21", + "postcss": "^8.5.15", + "source-map-js": "^1.2.1" + } + }, + "node_modules/@vue/compiler-ssr": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/compiler-ssr/-/compiler-ssr-3.5.38.tgz", + "integrity": "sha512-7s+W5Gc42FGxZMcuwl8H5B29T8BJPMdBT7KHFE+BbAuZ/iTEdTtv7z2XiMjiaUUw4w3ZcCEdHs36RuYJ2VA7bA==", + "dependencies": { + "@vue/compiler-dom": "3.5.38", + "@vue/shared": "3.5.38" + } + }, + "node_modules/@vue/devtools-api": { + "version": "6.6.4", + "resolved": "https://registry.npmmirror.com/@vue/devtools-api/-/devtools-api-6.6.4.tgz", + "integrity": "sha512-sGhTPMuXqZ1rVOk32RylztWkfXTRhuS7vgAKv0zjqk8gbsHkJ7xfFf+jbySxt7tWObEJwyKaHMikV/WGDiQm8g==" + }, + "node_modules/@vue/reactivity": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/reactivity/-/reactivity-3.5.38.tgz", + "integrity": "sha512-pG6LV/NDNRbKizcUjFFLAfjaL8mcv4DmR9avNcUw2gDHBzZneuS2TWCmp633ynzxz9YYKNeEPK2I8Wraqy2HUQ==", + "dependencies": { + "@vue/shared": "3.5.38" + } + }, + "node_modules/@vue/runtime-core": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/runtime-core/-/runtime-core-3.5.38.tgz", + "integrity": "sha512-iyW8WVfF1CpCXxncZY5Ei6rSd6oZr5DgEom//fUjRBRl56AXPD+s9ATvukRt77ZFTuYlnVA1bxY+dJB94tWVYw==", + "dependencies": { + "@vue/reactivity": "3.5.38", + "@vue/shared": "3.5.38" + } + }, + "node_modules/@vue/runtime-dom": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/runtime-dom/-/runtime-dom-3.5.38.tgz", + "integrity": "sha512-apX2wt9sdfDshS+a2xueFZLVpt0GkRJZSoPmrW/SA4yzXTznhfcMVW59gr7h4YQeY0vJhdJkk2rsIDwgfFgC5A==", + "dependencies": { + "@vue/reactivity": "3.5.38", + "@vue/runtime-core": "3.5.38", + "@vue/shared": "3.5.38", + "csstype": "^3.2.3" + } + }, + "node_modules/@vue/server-renderer": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/server-renderer/-/server-renderer-3.5.38.tgz", + "integrity": "sha512-vue8vbf2QlV4quHqzwmJy6dWfmRhP1J8l4wtZg60CL6VoKqcPY2oe7may3+1d9qfpedjK5PRLFqd5k3Isj9mUw==", + "dependencies": { + "@vue/compiler-ssr": "3.5.38", + "@vue/shared": "3.5.38" + }, + "peerDependencies": { + "vue": "3.5.38" + } + }, + "node_modules/@vue/shared": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/@vue/shared/-/shared-3.5.38.tgz", + "integrity": "sha512-FTW0AFZNaK5/mOqvGBwVfUlNLU38TiQn4+DQgIFUnrBBJQ1crMJ82yeGQLV5jyKFsO8yRukpbuP7x+nRbH6aug==" + }, + "node_modules/@vueuse/core": { + "version": "14.3.0", + "resolved": "https://registry.npmmirror.com/@vueuse/core/-/core-14.3.0.tgz", + "integrity": "sha512-aHfz47g0ZhMtTVHmIzMVpJy8ePhhOy68GY5bv110+5DVtZ+W7BsOx+m61UNQqfrWyPztIHIanWa3E2tib3NFIw==", + "dependencies": { + "@types/web-bluetooth": "^0.0.21", + "@vueuse/metadata": "14.3.0", + "@vueuse/shared": "14.3.0" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/@vueuse/metadata": { + "version": "14.3.0", + "resolved": "https://registry.npmmirror.com/@vueuse/metadata/-/metadata-14.3.0.tgz", + "integrity": "sha512-BwxmbAzwAVF50+MW57GXOUEV61nFBGnlBvrTqj49PqWJu3uw7hdu72ztXeZ33RdZtDY6kO+bfCAE1PCn88Tktw==", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@vueuse/shared": { + "version": "14.3.0", + "resolved": "https://registry.npmmirror.com/@vueuse/shared/-/shared-14.3.0.tgz", + "integrity": "sha512-bZpge9eSXwa4ToSiqJ7j6KRwhAsneMFoSz3LMWKQDkqimm3D/tbFlrklrs/IOqC8tEcYmXQZJ6N0UrjhBirVCg==", + "funding": { + "url": "https://github.com/sponsors/antfu" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmmirror.com/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/async-validator": { + "version": "4.2.5", + "resolved": "https://registry.npmmirror.com/async-validator/-/async-validator-4.2.5.tgz", + "integrity": "sha512-7HhHjtERjqlNbZtqNqy2rckN/SpOOlmDliet+lP7k+eKZEjPk3DgyeU9lIXLdeLz0uBbbVp+9Qdow9wJWgwwfg==" + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmmirror.com/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.17.0", + "resolved": "https://registry.npmmirror.com/axios/-/axios-1.17.0.tgz", + "integrity": "sha512-J8SwNxprqqpbfenehxWYXE7CW+wM1BB4w3+N+g+/Wx40xM4rsLrfPmHHxSWIxJLYDgSY/HqlFPIYb2/S3rxafw==", + "dependencies": { + "follow-redirects": "^1.16.0", + "form-data": "^4.0.5", + "https-proxy-agent": "^5.0.1", + "proxy-from-env": "^2.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmmirror.com/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/csstype": { + "version": "3.2.3", + "resolved": "https://registry.npmmirror.com/csstype/-/csstype-3.2.3.tgz", + "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==" + }, + "node_modules/dayjs": { + "version": "1.11.21", + "resolved": "https://registry.npmmirror.com/dayjs/-/dayjs-1.11.21.tgz", + "integrity": "sha512-98IT+HOahAisibz/yjKbzuOBwYcjJ7BCLPzARyHiyEBmRz4fatF+KPJszEHXsGYjUG234aH/cOjW1wwTbKUZlA==" + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmmirror.com/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmmirror.com/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/element-plus": { + "version": "2.14.2", + "resolved": "https://registry.npmmirror.com/element-plus/-/element-plus-2.14.2.tgz", + "integrity": "sha512-eNH9uP3wQoNqieEIHXiNvIVv+zO5sZDU0CAZq5b0zqSN06DD0/V9xIq1R/qm3rw5k3nBTM1JvpxhCfRbaFLzDQ==", + "dependencies": { + "@ctrl/tinycolor": "^4.2.0", + "@element-plus/icons-vue": "^2.3.2", + "@floating-ui/dom": "^1.7.6", + "@popperjs/core": "npm:@sxzz/popperjs-es@^2.11.8", + "@types/lodash": "^4.17.24", + "@types/lodash-es": "^4.17.12", + "@vueuse/core": "14.3.0", + "async-validator": "^4.2.5", + "dayjs": "^1.11.20", + "lodash": "^4.18.1", + "lodash-es": "^4.18.1", + "lodash-unified": "^1.0.3", + "memoize-one": "^6.0.0", + "normalize-wheel-es": "^1.2.0", + "vue-component-type-helpers": "^3.3.3" + }, + "peerDependencies": { + "vue": "^3.3.7" + } + }, + "node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmmirror.com/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmmirror.com/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.2", + "resolved": "https://registry.npmmirror.com/es-object-atoms/-/es-object-atoms-1.1.2.tgz", + "integrity": "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/esbuild": { + "version": "0.21.5", + "resolved": "https://registry.npmmirror.com/esbuild/-/esbuild-0.21.5.tgz", + "integrity": "sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.21.5", + "@esbuild/android-arm": "0.21.5", + "@esbuild/android-arm64": "0.21.5", + "@esbuild/android-x64": "0.21.5", + "@esbuild/darwin-arm64": "0.21.5", + "@esbuild/darwin-x64": "0.21.5", + "@esbuild/freebsd-arm64": "0.21.5", + "@esbuild/freebsd-x64": "0.21.5", + "@esbuild/linux-arm": "0.21.5", + "@esbuild/linux-arm64": "0.21.5", + "@esbuild/linux-ia32": "0.21.5", + "@esbuild/linux-loong64": "0.21.5", + "@esbuild/linux-mips64el": "0.21.5", + "@esbuild/linux-ppc64": "0.21.5", + "@esbuild/linux-riscv64": "0.21.5", + "@esbuild/linux-s390x": "0.21.5", + "@esbuild/linux-x64": "0.21.5", + "@esbuild/netbsd-x64": "0.21.5", + "@esbuild/openbsd-x64": "0.21.5", + "@esbuild/sunos-x64": "0.21.5", + "@esbuild/win32-arm64": "0.21.5", + "@esbuild/win32-ia32": "0.21.5", + "@esbuild/win32-x64": "0.21.5" + } + }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmmirror.com/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==" + }, + "node_modules/follow-redirects": { + "version": "1.16.0", + "resolved": "https://registry.npmmirror.com/follow-redirects/-/follow-redirects-1.16.0.tgz", + "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmmirror.com/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmmirror.com/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmmirror.com/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmmirror.com/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmmirror.com/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.4", + "resolved": "https://registry.npmmirror.com/hasown/-/hasown-2.0.4.tgz", + "integrity": "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/lodash": { + "version": "4.18.1", + "resolved": "https://registry.npmmirror.com/lodash/-/lodash-4.18.1.tgz", + "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==" + }, + "node_modules/lodash-es": { + "version": "4.18.1", + "resolved": "https://registry.npmmirror.com/lodash-es/-/lodash-es-4.18.1.tgz", + "integrity": "sha512-J8xewKD/Gk22OZbhpOVSwcs60zhd95ESDwezOFuA3/099925PdHJ7OFHNTGtajL3AlZkykD32HykiMo+BIBI8A==" + }, + "node_modules/lodash-unified": { + "version": "1.0.3", + "resolved": "https://registry.npmmirror.com/lodash-unified/-/lodash-unified-1.0.3.tgz", + "integrity": "sha512-WK9qSozxXOD7ZJQlpSqOT+om2ZfcT4yO+03FuzAHD0wF6S0l0090LRPDx3vhTTLZ8cFKpBn+IOcVXK6qOcIlfQ==", + "peerDependencies": { + "@types/lodash-es": "*", + "lodash": "*", + "lodash-es": "*" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmmirror.com/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmmirror.com/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/memoize-one": { + "version": "6.0.0", + "resolved": "https://registry.npmmirror.com/memoize-one/-/memoize-one-6.0.0.tgz", + "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==" + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmmirror.com/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmmirror.com/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/nanoid": { + "version": "3.3.12", + "resolved": "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.12.tgz", + "integrity": "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/normalize-wheel-es": { + "version": "1.2.0", + "resolved": "https://registry.npmmirror.com/normalize-wheel-es/-/normalize-wheel-es-1.2.0.tgz", + "integrity": "sha512-Wj7+EJQ8mSuXr2iWfnujrimU35R2W4FAErEyTmJoJ7ucwTn2hOUSsRehMb5RSYkxXGTM7Y9QpvPmp++w5ftoJw==" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmmirror.com/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==" + }, + "node_modules/postcss": { + "version": "8.5.15", + "resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.5.15.tgz", + "integrity": "sha512-FfR8sjd4em2T6fb3I2MwAJU7HWVMr9zba+enmQeeWFfCbm+UOC/0X4DS8XtpUTMwWMGbjKYP7xjfNekzyGmB3A==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/postcss/" + }, + { + "type": "tidelift", + "url": "https://tidelift.com/funding/github/npm/postcss" + }, + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "dependencies": { + "nanoid": "^3.3.12", + "picocolors": "^1.1.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/proxy-from-env": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/proxy-from-env/-/proxy-from-env-2.1.0.tgz", + "integrity": "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==", + "engines": { + "node": ">=10" + } + }, + "node_modules/rollup": { + "version": "4.61.1", + "resolved": "https://registry.npmmirror.com/rollup/-/rollup-4.61.1.tgz", + "integrity": "sha512-I4KW6iuRpuu2uHBLraZ1wNZe0DP7lnRha+VJ9tNaYVaVgKhW0aI3h4RYnoRPeql0flHm/Co55b7snEDcOfOJrA==", + "dev": true, + "dependencies": { + "@types/estree": "1.0.9" + }, + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=18.0.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "@rollup/rollup-android-arm-eabi": "4.61.1", + "@rollup/rollup-android-arm64": "4.61.1", + "@rollup/rollup-darwin-arm64": "4.61.1", + "@rollup/rollup-darwin-x64": "4.61.1", + "@rollup/rollup-freebsd-arm64": "4.61.1", + "@rollup/rollup-freebsd-x64": "4.61.1", + "@rollup/rollup-linux-arm-gnueabihf": "4.61.1", + "@rollup/rollup-linux-arm-musleabihf": "4.61.1", + "@rollup/rollup-linux-arm64-gnu": "4.61.1", + "@rollup/rollup-linux-arm64-musl": "4.61.1", + "@rollup/rollup-linux-loong64-gnu": "4.61.1", + "@rollup/rollup-linux-loong64-musl": "4.61.1", + "@rollup/rollup-linux-ppc64-gnu": "4.61.1", + "@rollup/rollup-linux-ppc64-musl": "4.61.1", + "@rollup/rollup-linux-riscv64-gnu": "4.61.1", + "@rollup/rollup-linux-riscv64-musl": "4.61.1", + "@rollup/rollup-linux-s390x-gnu": "4.61.1", + "@rollup/rollup-linux-x64-gnu": "4.61.1", + "@rollup/rollup-linux-x64-musl": "4.61.1", + "@rollup/rollup-openbsd-x64": "4.61.1", + "@rollup/rollup-openharmony-arm64": "4.61.1", + "@rollup/rollup-win32-arm64-msvc": "4.61.1", + "@rollup/rollup-win32-ia32-msvc": "4.61.1", + "@rollup/rollup-win32-x64-gnu": "4.61.1", + "@rollup/rollup-win32-x64-msvc": "4.61.1", + "fsevents": "~2.3.2" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/vite": { + "version": "5.4.21", + "resolved": "https://registry.npmmirror.com/vite/-/vite-5.4.21.tgz", + "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", + "dev": true, + "dependencies": { + "esbuild": "^0.21.3", + "postcss": "^8.4.43", + "rollup": "^4.20.0" + }, + "bin": { + "vite": "bin/vite.js" + }, + "engines": { + "node": "^18.0.0 || >=20.0.0" + }, + "funding": { + "url": "https://github.com/vitejs/vite?sponsor=1" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + }, + "peerDependencies": { + "@types/node": "^18.0.0 || >=20.0.0", + "less": "*", + "lightningcss": "^1.21.0", + "sass": "*", + "sass-embedded": "*", + "stylus": "*", + "sugarss": "*", + "terser": "^5.4.0" + }, + "peerDependenciesMeta": { + "@types/node": { + "optional": true + }, + "less": { + "optional": true + }, + "lightningcss": { + "optional": true + }, + "sass": { + "optional": true + }, + "sass-embedded": { + "optional": true + }, + "stylus": { + "optional": true + }, + "sugarss": { + "optional": true + }, + "terser": { + "optional": true + } + } + }, + "node_modules/vue": { + "version": "3.5.38", + "resolved": "https://registry.npmmirror.com/vue/-/vue-3.5.38.tgz", + "integrity": "sha512-vAMKHfImQlYSy0C+PBue4s3ERZ2xGKfgZg5GXAsLInq1dyh2H78ILVP5sK0KPFPVW4kv+OGCIvBEondcjpZp7A==", + "dependencies": { + "@vue/compiler-dom": "3.5.38", + "@vue/compiler-sfc": "3.5.38", + "@vue/runtime-dom": "3.5.38", + "@vue/server-renderer": "3.5.38", + "@vue/shared": "3.5.38" + }, + "peerDependencies": { + "typescript": "*" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/vue-component-type-helpers": { + "version": "3.3.4", + "resolved": "https://registry.npmmirror.com/vue-component-type-helpers/-/vue-component-type-helpers-3.3.4.tgz", + "integrity": "sha512-joip1uZTaQR0nD23N400gIdJ7xY+WiiiMA/BCKz842gvGBknqDQAzklUvDEhqFvvrhQY8S2ZANBMu4X70VMFGw==" + }, + "node_modules/vue-router": { + "version": "4.6.4", + "resolved": "https://registry.npmmirror.com/vue-router/-/vue-router-4.6.4.tgz", + "integrity": "sha512-Hz9q5sa33Yhduglwz6g9skT8OBPii+4bFn88w6J+J4MfEo4KRRpmiNG/hHHkdbRFlLBOqxN8y8gf2Fb0MTUgVg==", + "dependencies": { + "@vue/devtools-api": "^6.6.4" + }, + "funding": { + "url": "https://github.com/sponsors/posva" + }, + "peerDependencies": { + "vue": "^3.5.0" + } + } + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..502b563 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,22 @@ +{ + "name": "dataclean-frontend", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite --host 0.0.0.0", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "vue": "^3.4.21", + "vue-router": "^4.3.0", + "element-plus": "^2.6.3", + "@element-plus/icons-vue": "^2.3.1", + "axios": "^1.6.8" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "vite": "^5.2.0" + } +} diff --git a/frontend/src/App.vue b/frontend/src/App.vue new file mode 100644 index 0000000..32ca499 --- /dev/null +++ b/frontend/src/App.vue @@ -0,0 +1,125 @@ + + + + + diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js new file mode 100644 index 0000000..3850caf --- /dev/null +++ b/frontend/src/api/index.js @@ -0,0 +1,74 @@ +import axios from 'axios' + +const API_TOKEN_KEY = 'dataclean_api_token' + +const api = axios.create({ + baseURL: '/api', + timeout: 30000, +}) + +export function getApiToken() { + return localStorage.getItem(API_TOKEN_KEY) || '' +} + +export function setApiToken(token) { + if (token) { + localStorage.setItem(API_TOKEN_KEY, token) + } else { + localStorage.removeItem(API_TOKEN_KEY) + } +} + +api.interceptors.request.use((config) => { + const token = getApiToken() + if (token) { + config.headers.Authorization = `Bearer ${token}` + } + return config +}) + +api.interceptors.response.use( + (response) => response.data, + (error) => { + const status = error.response?.status + const detail = error.response?.data?.detail || error.message || '请求失败' + if (status === 401 || status === 403) { + return Promise.reject(new Error(`${detail},请检查 API Token 是否设置正确`)) + } + return Promise.reject(new Error(detail)) + } +) + +export default api + +export const datacleanApi = { + // 健康检查 + health: () => axios.get('/health').then((r) => r.data), + + // 仪表盘 + getStats: () => api.get('/stats'), + + // 文章 + getArticles: (params) => api.get('/articles', { params }), + getArticle: (id) => api.get(`/articles/${id}`), + + // 简报 + getBriefs: (params) => api.get('/briefs', { params }), + getBrief: (date) => api.get(`/briefs/${date}`), + regenerateBrief: (date) => api.post(`/briefs/${date}/regenerate`), + + // 分类体系 + getTaxonomy: (kind) => api.get('/taxonomy', { params: kind ? { kind } : {} }), + bootstrapTaxonomy: (force = false) => api.post(`/taxonomy/bootstrap?force=${force}`), + + // 任务 + summarize: () => api.post('/tasks/summarize'), + tagScoreDedup: () => api.post('/tasks/tag-score-dedup'), + generateBrief: () => api.post('/tasks/brief'), + + // 配置 + getSettings: () => api.get('/settings'), + updateSetting: (key, value) => api.put(`/settings/${key}`, { value }), + updateSettingsBatch: (settings) => api.put('/settings', { settings }), + resetSettings: () => api.post('/settings/reset'), +} diff --git a/frontend/src/main.js b/frontend/src/main.js new file mode 100644 index 0000000..932f782 --- /dev/null +++ b/frontend/src/main.js @@ -0,0 +1,20 @@ +import { createApp } from 'vue' +import ElementPlus from 'element-plus' +import * as ElementPlusIconsVue from '@element-plus/icons-vue' +import 'element-plus/dist/index.css' +import 'element-plus/theme-chalk/dark/css-vars.css' + +import App from './App.vue' +import router from './router' +import './style.css' + +const app = createApp(App) + +app.use(ElementPlus) +app.use(router) + +for (const [key, component] of Object.entries(ElementPlusIconsVue)) { + app.component(key, component) +} + +app.mount('#app') diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js new file mode 100644 index 0000000..c4b1dd1 --- /dev/null +++ b/frontend/src/router/index.js @@ -0,0 +1,28 @@ +import { createRouter, createWebHistory } from 'vue-router' +import Dashboard from '@/views/Dashboard.vue' +import Articles from '@/views/Articles.vue' +import ArticleDetail from '@/views/ArticleDetail.vue' +import Briefs from '@/views/Briefs.vue' +import BriefDetail from '@/views/BriefDetail.vue' +import Taxonomy from '@/views/Taxonomy.vue' +import Tasks from '@/views/Tasks.vue' +import Settings from '@/views/Settings.vue' + +const routes = [ + { path: '/', redirect: '/dashboard' }, + { path: '/dashboard', name: 'Dashboard', component: Dashboard }, + { path: '/articles', name: 'Articles', component: Articles }, + { path: '/articles/:id', name: 'ArticleDetail', component: ArticleDetail, props: true }, + { path: '/briefs', name: 'Briefs', component: Briefs }, + { path: '/briefs/:date', name: 'BriefDetail', component: BriefDetail, props: true }, + { path: '/taxonomy', name: 'Taxonomy', component: Taxonomy }, + { path: '/tasks', name: 'Tasks', component: Tasks }, + { path: '/settings', name: 'Settings', component: Settings }, +] + +const router = createRouter({ + history: createWebHistory(), + routes, +}) + +export default router diff --git a/frontend/src/style.css b/frontend/src/style.css new file mode 100644 index 0000000..e51a526 --- /dev/null +++ b/frontend/src/style.css @@ -0,0 +1,164 @@ +:root { + --dc-bg: #0f0f23; + --dc-card-bg: #1a1a2e; + --dc-border: #2d2d44; + --dc-text: #e0e0e0; + --dc-text-secondary: #a0a0a0; + --dc-primary: #409eff; + --dc-success: #67c23a; + --dc-warning: #e6a23c; + --dc-danger: #f56c6c; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + background-color: var(--dc-bg); + color: var(--dc-text); +} + +.page-title { + font-size: 24px; + font-weight: 600; + margin-bottom: 20px; + color: var(--dc-text); +} + +.stat-card { + background: var(--dc-card-bg); + border: 1px solid var(--dc-border); + border-radius: 8px; + padding: 20px; + transition: transform 0.2s; +} + +.stat-card:hover { + transform: translateY(-2px); +} + +.stat-value { + font-size: 28px; + font-weight: 700; + color: var(--dc-primary); +} + +.stat-label { + font-size: 14px; + color: var(--dc-text-secondary); + margin-top: 8px; +} + +.dark-card { + background: var(--dc-card-bg) !important; + border: 1px solid var(--dc-border) !important; + color: var(--dc-text) !important; +} + +.dark-card .el-card__header { + border-bottom: 1px solid var(--dc-border) !important; + color: var(--dc-text) !important; +} + +.daily-bar-wrap { + display: flex; + align-items: flex-end; + gap: 8px; + height: 120px; + padding: 10px 0; +} + +.daily-bar { + flex: 1; + background: linear-gradient(to top, var(--dc-primary), #66b1ff); + border-radius: 4px 4px 0 0; + min-width: 20px; + position: relative; + transition: opacity 0.2s; +} + +.daily-bar:hover { + opacity: 0.8; +} + +.daily-bar-label { + position: absolute; + bottom: -20px; + left: 50%; + transform: translateX(-50%); + font-size: 12px; + color: var(--dc-text-secondary); + white-space: nowrap; +} + +.daily-bar-value { + position: absolute; + top: -20px; + left: 50%; + transform: translateX(-50%); + font-size: 12px; + color: var(--dc-text); +} + +.score-progress { + margin-top: 8px; +} + +.score-progress .el-progress-bar__outer { + background-color: rgba(255, 255, 255, 0.1) !important; +} + +.article-link { + color: var(--dc-primary); + text-decoration: none; +} + +.article-link:hover { + text-decoration: underline; +} + +.tag-item { + margin-right: 6px; + margin-bottom: 4px; +} + +/* Element Plus 暗色覆盖 */ +.el-menu { + border-right: none !important; + background-color: transparent !important; +} + +.el-aside { + background-color: var(--dc-card-bg) !important; + border-right: 1px solid var(--dc-border) !important; +} + +.el-container { + background-color: var(--dc-bg) !important; +} + +.el-main { + background-color: var(--dc-bg) !important; +} + +.el-table { + background-color: transparent !important; +} + +.el-table th, +.el-table tr { + background-color: transparent !important; +} + +.el-table--enable-row-hover .el-table__body tr:hover > td { + background-color: rgba(64, 158, 255, 0.1) !important; +} + +.el-input__wrapper, +.el-textarea__inner { + background-color: rgba(255, 255, 255, 0.05) !important; +} diff --git a/frontend/src/views/ArticleDetail.vue b/frontend/src/views/ArticleDetail.vue new file mode 100644 index 0000000..c6462cc --- /dev/null +++ b/frontend/src/views/ArticleDetail.vue @@ -0,0 +1,163 @@ + + + + + diff --git a/frontend/src/views/Articles.vue b/frontend/src/views/Articles.vue new file mode 100644 index 0000000..55ac557 --- /dev/null +++ b/frontend/src/views/Articles.vue @@ -0,0 +1,117 @@ + + + diff --git a/frontend/src/views/BriefDetail.vue b/frontend/src/views/BriefDetail.vue new file mode 100644 index 0000000..ac9519e --- /dev/null +++ b/frontend/src/views/BriefDetail.vue @@ -0,0 +1,121 @@ + + + + + diff --git a/frontend/src/views/Briefs.vue b/frontend/src/views/Briefs.vue new file mode 100644 index 0000000..a4ca183 --- /dev/null +++ b/frontend/src/views/Briefs.vue @@ -0,0 +1,56 @@ + + + diff --git a/frontend/src/views/Dashboard.vue b/frontend/src/views/Dashboard.vue new file mode 100644 index 0000000..265d7ee --- /dev/null +++ b/frontend/src/views/Dashboard.vue @@ -0,0 +1,152 @@ + + + diff --git a/frontend/src/views/Settings.vue b/frontend/src/views/Settings.vue new file mode 100644 index 0000000..69d22d0 --- /dev/null +++ b/frontend/src/views/Settings.vue @@ -0,0 +1,103 @@ + + + diff --git a/frontend/src/views/Tasks.vue b/frontend/src/views/Tasks.vue new file mode 100644 index 0000000..2359e8a --- /dev/null +++ b/frontend/src/views/Tasks.vue @@ -0,0 +1,116 @@ + + + + + diff --git a/frontend/src/views/Taxonomy.vue b/frontend/src/views/Taxonomy.vue new file mode 100644 index 0000000..e7485ac --- /dev/null +++ b/frontend/src/views/Taxonomy.vue @@ -0,0 +1,110 @@ + + + + + diff --git a/frontend/src/views/TaxonomyTable.vue b/frontend/src/views/TaxonomyTable.vue new file mode 100644 index 0000000..2613dbc --- /dev/null +++ b/frontend/src/views/TaxonomyTable.vue @@ -0,0 +1,30 @@ + + + diff --git a/frontend/vite.config.js b/frontend/vite.config.js new file mode 100644 index 0000000..8b9156b --- /dev/null +++ b/frontend/vite.config.js @@ -0,0 +1,30 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' +import { resolve } from 'path' + +// https://vitejs.dev/config/ +export default defineConfig({ + plugins: [vue()], + resolve: { + alias: { + '@': resolve(__dirname, 'src'), + }, + }, + server: { + port: 7332, + proxy: { + '/api': { + target: 'http://localhost:7331', + changeOrigin: true, + }, + '/health': { + target: 'http://localhost:7331', + changeOrigin: true, + }, + }, + }, + build: { + outDir: 'dist', + assetsDir: 'assets', + }, +}) diff --git a/main.py b/main.py new file mode 100644 index 0000000..407ce9e --- /dev/null +++ b/main.py @@ -0,0 +1,426 @@ +"""dataClean FastAPI 入口""" +import logging +import os +from contextlib import asynccontextmanager +from datetime import datetime, timedelta, timezone +from typing import Optional, List + +from fastapi import FastAPI, Depends, HTTPException, Query, Body, Security, status +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from pydantic import BaseModel, ConfigDict +from sqlalchemy.orm import Session + +from config import settings +from database import init_db, get_db, SessionLocal +from scheduler import init_scheduler, stop_scheduler, get_scheduler, get_task_lock +from app.taxonomy import bootstrap_taxonomy, list_taxonomy, ensure_taxonomy +from app.summarizer import fetch_and_summarize +from app.tagger import tag_articles +from app.deduplicator import deduplicate_articles +from app.scorer import score_articles +from app.brief import generate_daily_brief +from app.settings_manager import ( + init_default_settings, + list_settings, + get_setting, + set_setting, + reset_settings, + apply_db_settings_to_config, +) +from models import EnrichedArticle, DailyBrief, Taxonomy, DuplicateGroup, AppSetting + +logging.basicConfig( + level=getattr(logging, settings.LOG_LEVEL.upper(), logging.INFO), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# API Token 鉴权(当配置时启用) +security_scheme = HTTPBearer(auto_error=False) + + +def _get_allowed_origins() -> List[str]: + """解析 CORS 允许来源配置""" + raw = settings.CORS_ALLOWED_ORIGINS + if raw: + return [o.strip() for o in raw.split(",") if o.strip()] + # 默认只允许同源(Docker/生产由反向代理或浏览器同源访问) + return [] + + +def verify_token(credentials: Optional[HTTPAuthorizationCredentials] = Security(security_scheme)): + """验证 API Token;未配置时跳过鉴权""" + token = settings.API_TOKEN + if not token: + return None + if not credentials: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="缺少 Authorization 请求头", + headers={"WWW-Authenticate": "Bearer"}, + ) + if credentials.scheme != "Bearer" or credentials.credentials != token: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="无效的 API Token", + ) + return credentials.credentials + + +def _run_task_locked(func, db: Session): + """带互斥锁执行任务""" + acquired = get_task_lock().acquire(blocking=False) + if not acquired: + raise HTTPException(status_code=409, detail="已有任务正在执行,请稍后再试") + try: + return func(db) + finally: + get_task_lock().release() + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """应用生命周期管理""" + logger.info("启动 dataClean 服务") + init_db() + + db = SessionLocal() + try: + # 初始化默认配置 + init_default_settings(db) + # 用数据库配置覆盖全局 settings + apply_db_settings_to_config(db) + # 首次启动时确保 taxonomy 表存在 + ensure_taxonomy(db) + except Exception as exc: + logger.error("启动初始化失败: %s", exc) + finally: + db.close() + + init_scheduler() + yield + stop_scheduler() + + +app = FastAPI( + title="dataClean", + description="RSS 数据清洗、摘要、分类、打分与简报生成服务", + version="1.0.0", + lifespan=lifespan, +) + +# CORS 配置:生产环境收敛到具体域名,且不与 credentials=true 同时用通配符 +_allowed_origins = _get_allowed_origins() +app.add_middleware( + CORSMiddleware, + allow_origins=_allowed_origins or ["*"], + allow_credentials=bool(_allowed_origins), + allow_methods=["*"], + allow_headers=["*"], +) + + +# ---------- Pydantic 模型 ---------- + +class ArticleOut(BaseModel): + id: int + rk_article_id: int + title: str + link: str + feed_title: str + category: str + tags: List[str] + heat_score: float + importance_score: float + duplication_score: float + composite_score: float + ai_summary: str + is_representative: bool + published_at: Optional[str] + + model_config = ConfigDict(from_attributes=True) + + +class ArticleListOut(BaseModel): + items: List[ArticleOut] + total: int + + +class BriefOut(BaseModel): + id: int + brief_date: str + total_articles: int + unique_articles: int + by_category: dict + markdown_path: str + + model_config = ConfigDict(from_attributes=True) + + +class TaxonomyOut(BaseModel): + id: int + name: str + kind: str + description: str + keywords: List[str] + weight: float + created_by_ai: bool + + model_config = ConfigDict(from_attributes=True) + + +class SettingOut(BaseModel): + key: str + value: str + description: str + is_sensitive: bool + is_masked: bool + updated_at: Optional[str] + + +class SettingUpdate(BaseModel): + value: str + + +class BatchSettingsUpdate(BaseModel): + settings: dict + + +class StatsOut(BaseModel): + total_articles: int + today_articles: int + ai_summarized: int + categories: int + tags: int + duplicate_groups: int + briefs: int + next_jobs: dict + + +# ---------- 健康检查 ---------- + +@app.get("/health") +def health(): + return {"status": "ok", "service": "dataClean"} + + +# ---------- 文章接口 ---------- + +@app.get("/api/articles", response_model=ArticleListOut) +def list_articles( + date: Optional[str] = Query(None, description="日期 YYYY-MM-DD"), + category: Optional[str] = Query(None), + tag: Optional[str] = Query(None), + representative_only: bool = Query(False, description="仅返回重复组代表文章"), + limit: int = Query(50, ge=1, le=200), + offset: int = Query(0, ge=0), + db: Session = Depends(get_db), +): + query = db.query(EnrichedArticle) + + if date: + day = datetime.strptime(date, "%Y-%m-%d") + next_day = day + timedelta(days=1) + query = query.filter(EnrichedArticle.fetched_at >= day, EnrichedArticle.fetched_at < next_day) + if category: + query = query.filter(EnrichedArticle.category == category) + if tag: + # SQLite JSON 列使用 json_each 做精确匹配,避免字符串子串误命中 + query = query.filter( + EnrichedArticle.tags.contains([tag]) + ) + if representative_only: + query = query.filter( + (EnrichedArticle.is_representative == True) | (EnrichedArticle.duplicate_group_id == None) + ) + + total = query.count() + items = query.order_by(EnrichedArticle.composite_score.desc()).offset(offset).limit(limit).all() + return {"items": items, "total": total} + + +@app.get("/api/articles/{article_id}", response_model=ArticleOut) +def get_article(article_id: int, db: Session = Depends(get_db)): + article = db.query(EnrichedArticle).filter(EnrichedArticle.id == article_id).first() + if not article: + raise HTTPException(status_code=404, detail="文章不存在") + return article + + +# ---------- 简报接口 ---------- + +@app.get("/api/briefs", response_model=List[BriefOut]) +def list_briefs( + limit: int = Query(30, ge=1, le=100), + db: Session = Depends(get_db), +): + return ( + db.query(DailyBrief) + .order_by(DailyBrief.brief_date.desc()) + .limit(limit) + .all() + ) + + +@app.get("/api/briefs/{date}", response_model=BriefOut) +def get_brief(date: str, db: Session = Depends(get_db)): + brief = db.query(DailyBrief).filter(DailyBrief.brief_date == date).first() + if not brief: + raise HTTPException(status_code=404, detail="简报不存在") + return brief + + +@app.post("/api/briefs/{date}/regenerate") +def regenerate_brief(date: str, db: Session = Depends(get_db), _=Depends(verify_token)): + try: + data = generate_daily_brief(db, date_str=date, force=True) + return {"message": "简报已重新生成", "data": data} + except Exception as exc: + logger.error("重新生成简报失败: %s", exc) + raise HTTPException(status_code=500, detail=str(exc)) + + +# ---------- 分类体系接口 ---------- + +@app.get("/api/taxonomy", response_model=List[TaxonomyOut]) +def get_taxonomy(kind: Optional[str] = Query(None), db: Session = Depends(get_db)): + return list_taxonomy(db, kind=kind) + + +@app.post("/api/taxonomy/bootstrap") +def trigger_taxonomy_bootstrap( + force: bool = False, + db: Session = Depends(get_db), + _=Depends(verify_token), +): + ok = bootstrap_taxonomy(db, force=force) + if not ok: + return {"message": "taxonomy 已存在或初始化失败,请检查日志"} + return {"message": "taxonomy 初始化成功"} + + +# ---------- 手动触发任务接口 ---------- + +@app.post("/api/tasks/summarize") +def task_summarize(db: Session = Depends(get_db), _=Depends(verify_token)): + stats = _run_task_locked(lambda session: fetch_and_summarize(session, hours=24, limit=200), db) + return {"message": "摘要任务完成", "stats": stats} + + +@app.post("/api/tasks/tag-score-dedup") +def task_tag_score_dedup(db: Session = Depends(get_db), _=Depends(verify_token)): + def _run(session): + tag_articles(session) + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + deduplicate_articles(session, date_str=today) + score_articles(session, update_duplication=True) + return None + _run_task_locked(_run, db) + return {"message": "分类/去重/打分任务完成"} + + +@app.post("/api/tasks/brief") +def task_brief(db: Session = Depends(get_db), _=Depends(verify_token)): + def _run(session): + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + return generate_daily_brief(session, date_str=today, force=True) + data = _run_task_locked(_run, db) + return {"message": "简报生成任务完成", "data": data} + + +# ---------- 配置管理接口 ---------- + +@app.get("/api/settings", response_model=List[SettingOut]) +def get_settings(db: Session = Depends(get_db), _=Depends(verify_token)): + return list_settings(db, mask_sensitive=True) + + +@app.put("/api/settings/{key}") +def update_setting( + key: str, + body: SettingUpdate, + db: Session = Depends(get_db), + _=Depends(verify_token), +): + ok = set_setting(db, key, body.value) + if not ok: + raise HTTPException(status_code=400, detail="无效的配置项") + return {"message": "配置已保存,重启服务后生效"} + + +@app.put("/api/settings") +def update_settings_batch( + body: BatchSettingsUpdate, + db: Session = Depends(get_db), + _=Depends(verify_token), +): + errors = [] + for key, value in body.settings.items(): + if not set_setting(db, key, value): + errors.append(key) + if errors: + raise HTTPException(status_code=400, detail=f"以下配置项无效: {', '.join(errors)}") + return {"message": "配置已保存,重启服务后生效"} + + +@app.post("/api/settings/reset") +def reset_all_settings(db: Session = Depends(get_db), _=Depends(verify_token)): + reset_settings(db) + return {"message": "配置已重置为环境变量默认值,重启服务后生效"} + + +# ---------- 仪表盘统计接口 ---------- + +@app.get("/api/stats", response_model=StatsOut) +def get_stats(db: Session = Depends(get_db)): + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + day_start = datetime.strptime(today, "%Y-%m-%d") + day_end = day_start + timedelta(days=1) + + total_articles = db.query(EnrichedArticle).count() + today_articles = ( + db.query(EnrichedArticle) + .filter(EnrichedArticle.fetched_at >= day_start, EnrichedArticle.fetched_at < day_end) + .count() + ) + ai_summarized = db.query(EnrichedArticle).filter(EnrichedArticle.ai_summary != "").count() + categories = db.query(Taxonomy).filter(Taxonomy.kind == "category").count() + tags = db.query(Taxonomy).filter(Taxonomy.kind == "tag").count() + duplicate_groups = db.query(DuplicateGroup).count() + briefs = db.query(DailyBrief).count() + + scheduler = get_scheduler() + next_jobs = {} + for job in scheduler.get_jobs(): + next_jobs[job.id] = job.next_run_time.isoformat() if job.next_run_time else None + + return { + "total_articles": total_articles, + "today_articles": today_articles, + "ai_summarized": ai_summarized, + "categories": categories, + "tags": tags, + "duplicate_groups": duplicate_groups, + "briefs": briefs, + "next_jobs": next_jobs, + } + + +# ---------- 静态文件托管(生产环境) ---------- + +static_dir = os.path.join(os.path.dirname(__file__), "static") +if not os.path.isdir(static_dir): + # 本地构建时 frontend/dist 也可作为静态文件源 + frontend_dist = os.path.join(os.path.dirname(__file__), "frontend", "dist") + if os.path.isdir(frontend_dist): + static_dir = frontend_dist + +if os.path.isdir(static_dir): + app.mount("/", StaticFiles(directory=static_dir, html=True), name="static") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=7331) diff --git a/models.py b/models.py new file mode 100644 index 0000000..5346d31 --- /dev/null +++ b/models.py @@ -0,0 +1,109 @@ +"""SQLAlchemy 数据模型""" +from datetime import datetime, timezone +from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, Float, ForeignKey, JSON +from sqlalchemy.orm import relationship + +from database import Base + + +def _utc_now(): + return datetime.now(timezone.utc) + + +class EnrichedArticle(Base): + """加工后的文章,存储 AI 摘要、分类、标签、分数和去重信息""" + + __tablename__ = "articles_enriched" + + id = Column(Integer, primary_key=True, index=True) + rk_article_id = Column(Integer, unique=True, nullable=False, index=True) + + title = Column(String(1024), default="", index=True) + link = Column(String(2048), default="", index=True) + feed_id = Column(Integer, nullable=False, index=True) + feed_title = Column(String(512), default="") + feed_category = Column(String(128), default="") + author = Column(String(256), default="") + + published_at = Column(DateTime, nullable=True, index=True) + fetched_at = Column(DateTime, default=_utc_now, index=True) + + original_summary = Column(Text, default="") + content = Column(Text, default="") + ai_summary = Column(Text, default="") + + category = Column(String(128), default="", index=True) + tags = Column(JSON, default=lambda: list()) + + heat_score = Column(Float, default=0.0) + importance_score = Column(Float, default=0.0) + duplication_score = Column(Float, default=0.0) + composite_score = Column(Float, default=0.0) + + duplicate_group_id = Column(Integer, ForeignKey("duplicate_groups.id", ondelete="SET NULL"), nullable=True, index=True) + is_representative = Column(Boolean, default=False, index=True) + + brief_date = Column(String(10), default="", index=True) + + created_at = Column(DateTime, default=_utc_now) + updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now) + + duplicate_group = relationship("DuplicateGroup", back_populates="articles") + + +class Taxonomy(Base): + """分类、标签、打分规则表""" + + __tablename__ = "taxonomy" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String(128), nullable=False, index=True) + kind = Column(String(32), nullable=False, index=True) # category/tag/heat_rule/importance_rule/duplication_rule + description = Column(Text, default="") + keywords = Column(JSON, default=list) # 关键词或规则列表 + weight = Column(Float, default=1.0) + created_by_ai = Column(Boolean, default=False) + created_at = Column(DateTime, default=_utc_now) + + +class DuplicateGroup(Base): + """重复文章组""" + + __tablename__ = "duplicate_groups" + + id = Column(Integer, primary_key=True, index=True) + representative_article_id = Column(Integer, ForeignKey("articles_enriched.id", ondelete="SET NULL"), nullable=True) + member_article_ids = Column(JSON, default=lambda: list()) + similarity_matrix = Column(JSON, default=lambda: dict()) + brief_date = Column(String(10), default="", index=True) + created_at = Column(DateTime, default=_utc_now) + + articles = relationship("EnrichedArticle", back_populates="duplicate_group") + + +class DailyBrief(Base): + """每日简报""" + + __tablename__ = "daily_briefs" + + id = Column(Integer, primary_key=True, index=True) + brief_date = Column(String(10), unique=True, nullable=False, index=True) + total_articles = Column(Integer, default=0) + unique_articles = Column(Integer, default=0) + by_category = Column(JSON, default=lambda: dict()) + markdown_path = Column(String(512), default="") + created_at = Column(DateTime, default=_utc_now) + updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now) + + +class AppSetting(Base): + """运行时配置表""" + + __tablename__ = "app_settings" + + id = Column(Integer, primary_key=True, index=True) + key = Column(String(128), unique=True, nullable=False, index=True) + value = Column(Text, default="") + description = Column(Text, default="") + is_sensitive = Column(Boolean, default=False) + updated_at = Column(DateTime, default=_utc_now, onupdate=_utc_now) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f3c205f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0 +sqlalchemy==2.0.36 +pydantic==2.9.2 +pydantic-settings==2.6.1 +requests==2.32.3 +apscheduler==3.10.4 +openai==1.55.3 +scikit-learn==1.5.2 +python-dateutil==2.9.0.post0 diff --git a/scheduler.py b/scheduler.py new file mode 100644 index 0000000..b001daf --- /dev/null +++ b/scheduler.py @@ -0,0 +1,166 @@ +"""APScheduler 定时任务""" +import functools +import logging +import threading +from datetime import datetime, timezone + +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.date import DateTrigger +from apscheduler.triggers.interval import IntervalTrigger +from apscheduler.triggers.cron import CronTrigger +from sqlalchemy.orm import Session + +from config import settings +from database import SessionLocal +from app.taxonomy import ensure_taxonomy, bootstrap_taxonomy +from app.summarizer import fetch_and_summarize +from app.tagger import tag_articles +from app.deduplicator import deduplicate_articles +from app.scorer import score_articles +from app.brief import generate_daily_brief +from app.settings_manager import get_setting_value + +logger = logging.getLogger(__name__) + +_scheduler: BackgroundScheduler | None = None + +# 任务互斥锁:防止手动任务与定时任务并发执行 +_task_lock = threading.Lock() + + +def get_scheduler() -> BackgroundScheduler: + global _scheduler + if _scheduler is None: + _scheduler = BackgroundScheduler( + job_defaults={ + "coalesce": True, + "max_instances": 1, + "misfire_grace_time": 300, + }, + timezone="Asia/Shanghai", + ) + return _scheduler + + +def get_task_lock(): + """返回全局任务互斥锁,供手动任务接口使用""" + return _task_lock + + +def _with_db(func): + """装饰器:为任务函数提供数据库会话,并记录运行日志""" + @functools.wraps(func) + def wrapper(): + acquired = _task_lock.acquire(blocking=False) + if not acquired: + logger.warning("定时任务 %s 跳过:已有其他任务正在执行", func.__name__) + return + db = SessionLocal() + try: + func(db) + except Exception as exc: + logger.error("定时任务 %s 执行失败: %s", func.__name__, exc, exc_info=True) + finally: + db.close() + _task_lock.release() + return wrapper + + +@_with_db +def job_bootstrap_taxonomy(db: Session): + """初始化分类体系(仅在表为空时执行)""" + logger.info("执行 taxonomy 初始化检查") + ensure_taxonomy(db) + + +@_with_db +def job_fetch_and_summarize(db: Session): + """拉取文章并生成摘要""" + logger.info("执行摘要生成任务") + fetch_and_summarize(db, hours=24, limit=200) + + +@_with_db +def job_tag_score_deduplicate(db: Session): + """对当天文章分类、打分、去重""" + logger.info("执行分类/打分/去重任务") + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + + # 1. 对当天未分类的文章打标签 + tag_articles(db) + + # 2. 对当天文章去重 + deduplicate_articles(db, date_str=today) + + # 3. 重新计算分数(含重复性分数) + score_articles(db, update_duplication=True) + + +@_with_db +def job_generate_daily_brief(db: Session): + """生成每日简报""" + logger.info("执行每日简报生成任务") + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + generate_daily_brief(db, date_str=today, force=True) + + +def init_scheduler(): + """注册并启动所有定时任务""" + scheduler = get_scheduler() + + # 从数据库/环境变量读取调度配置 + summarize_interval = int(get_setting_value("SUMMARIZE_INTERVAL_MINUTES", settings.SUMMARIZE_INTERVAL_MINUTES)) + tag_score_interval = int(get_setting_value("TAG_SCORE_INTERVAL_MINUTES", settings.TAG_SCORE_INTERVAL_MINUTES)) + brief_hour = int(get_setting_value("DAILY_BRIEF_HOUR", settings.DAILY_BRIEF_HOUR)) + brief_minute = int(get_setting_value("DAILY_BRIEF_MINUTE", settings.DAILY_BRIEF_MINUTE)) + + # taxonomy 初始化:服务启动后立即执行一次 + scheduler.add_job( + job_bootstrap_taxonomy, + trigger=DateTrigger(run_date=datetime.now()), + id="bootstrap_taxonomy", + replace_existing=True, + max_instances=1, + ) + + # 摘要任务 + scheduler.add_job( + job_fetch_and_summarize, + trigger=IntervalTrigger(minutes=summarize_interval), + id="fetch_and_summarize", + replace_existing=True, + ) + + # 分类/打分/去重任务 + scheduler.add_job( + job_tag_score_deduplicate, + trigger=IntervalTrigger(minutes=tag_score_interval), + id="tag_score_deduplicate", + replace_existing=True, + ) + + # 每日简报 + scheduler.add_job( + job_generate_daily_brief, + trigger=CronTrigger(hour=brief_hour, minute=brief_minute), + id="generate_daily_brief", + replace_existing=True, + ) + + scheduler.start() + logger.info( + "调度器已启动: summarize=%d分钟, tag_score=%d分钟, brief=%02d:%02d", + summarize_interval, + tag_score_interval, + brief_hour, + brief_minute, + ) + + +def stop_scheduler(): + """停止调度器""" + global _scheduler + if _scheduler: + _scheduler.shutdown(wait=False) + _scheduler = None + logger.info("调度器已停止") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..75d15eb --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,21 @@ +"""测试配置""" +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from database import Base +from models import EnrichedArticle, Taxonomy, DuplicateGroup, DailyBrief + + +TEST_DATABASE_URL = "sqlite:///:memory:" + + +@pytest.fixture(scope="function") +def db(): + engine = create_engine(TEST_DATABASE_URL, connect_args={"check_same_thread": False}) + Base.metadata.create_all(bind=engine) + Session = sessionmaker(bind=engine) + session = Session() + yield session + session.close() + Base.metadata.drop_all(bind=engine) diff --git a/tests/test_deduplicator.py b/tests/test_deduplicator.py new file mode 100644 index 0000000..59b33ad --- /dev/null +++ b/tests/test_deduplicator.py @@ -0,0 +1,78 @@ +"""去重模块测试""" +from datetime import datetime, timedelta, timezone + +from app.deduplicator import _title_similarity, _find_duplicate_clusters, deduplicate_articles +from models import EnrichedArticle + + +def test_title_similarity_identical(): + assert _title_similarity("OpenAI 发布 GPT-5", "OpenAI 发布 GPT-5") > 0.95 + + +def test_title_similarity_different(): + assert _title_similarity("OpenAI 发布 GPT-5", "苹果发布新款 iPhone") < 0.5 + + +def test_find_duplicate_clusters(db): + articles = [ + EnrichedArticle( + rk_article_id=1, + title="OpenAI 发布 GPT-5,性能大幅提升", + content="OpenAI 今天发布了 GPT-5,性能大幅提升。", + ), + EnrichedArticle( + rk_article_id=2, + title="OpenAI 发布 GPT-5 性能大幅提升", + content="OpenAI 发布了 GPT-5,性能提升明显。", + ), + EnrichedArticle( + rk_article_id=3, + title="苹果发布新款 iPhone", + content="苹果公司发布了新款 iPhone。", + ), + ] + clusters = _find_duplicate_clusters(articles, title_threshold=0.85, content_threshold=0.80) + assert len(clusters) == 1 + assert {0, 1} in clusters + + +def test_deduplicate_articles(db): + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + day_start = datetime.strptime(today, "%Y-%m-%d") + + a1 = EnrichedArticle( + rk_article_id=1, + title="OpenAI 发布 GPT-5", + content="OpenAI 今天发布了 GPT-5。", + fetched_at=day_start, + ) + a2 = EnrichedArticle( + rk_article_id=2, + title="OpenAI 发布 GPT-5 性能提升", + content="OpenAI 发布了 GPT-5,性能提升。", + fetched_at=day_start + timedelta(minutes=10), + ) + a3 = EnrichedArticle( + rk_article_id=3, + title="苹果发布新款 iPhone", + content="苹果发布了 iPhone。", + fetched_at=day_start + timedelta(minutes=20), + ) + + db.add_all([a1, a2, a3]) + db.commit() + + stats = deduplicate_articles(db, date_str=today, title_threshold=0.85, content_threshold=0.80) + + assert stats["total"] == 3 + assert stats["duplicate_groups"] == 1 + assert stats["representatives"] == 1 + + # 刷新对象 + db.refresh(a1) + db.refresh(a2) + db.refresh(a3) + + representatives = [a for a in [a1, a2, a3] if a.is_representative] + assert len(representatives) == 1 + assert representatives[0].duplicate_group_id is not None diff --git a/tests/test_scorer.py b/tests/test_scorer.py new file mode 100644 index 0000000..5971b2b --- /dev/null +++ b/tests/test_scorer.py @@ -0,0 +1,46 @@ +"""打分模块测试""" +from datetime import datetime + +from app.scorer import compute_heat_score, compute_importance_score, compute_duplication_score, compute_composite_score, score_articles +from models import EnrichedArticle, Taxonomy, DuplicateGroup + + +def test_compute_heat_score(): + rules = [Taxonomy(name="AI", kind="heat_rule", keywords=["AI", "大模型"], weight=1.5)] + article = EnrichedArticle(title="OpenAI 发布 GPT-5 大模型") + score = compute_heat_score(article, rules) + assert score > 0 + + +def test_compute_importance_score(): + rules = [Taxonomy(name="政策", kind="importance_rule", keywords=["政策", "监管"], weight=2.0)] + article = EnrichedArticle(title="新政策发布,加强 AI 监管") + score = compute_importance_score(article, rules) + assert score > 0 + + +def test_compute_duplication_score(): + assert compute_duplication_score(1) == 0.0 + assert compute_duplication_score(5) == 100.0 + + +def test_compute_composite_score(): + score = compute_composite_score(50, 80, 30) + expected = 50 * 0.3 + 80 * 0.5 + 30 * 0.2 + assert score == round(expected, 2) + + +def test_score_articles_integration(db): + db.add_all([ + Taxonomy(name="AI", kind="heat_rule", keywords=["AI"], weight=1.5), + Taxonomy(name="政策", kind="importance_rule", keywords=["政策"], weight=2.0), + ]) + article = EnrichedArticle(rk_article_id=1, title="AI 新政策发布") + db.add(article) + db.commit() + + score_articles(db, article_ids=[article.id]) + + assert article.heat_score > 0 + assert article.importance_score > 0 + assert article.composite_score > 0 diff --git a/tests/test_tagger.py b/tests/test_tagger.py new file mode 100644 index 0000000..34f716b --- /dev/null +++ b/tests/test_tagger.py @@ -0,0 +1,43 @@ +"""分类/标签模块测试""" +from app.tagger import classify_article, tag_article, tag_articles +from models import EnrichedArticle, Taxonomy + + +def test_classify_article(db): + categories = [ + Taxonomy(name="科技", kind="category", keywords=["AI", "大模型", "芯片"]), + Taxonomy(name="财经", kind="category", keywords=["股市", "基金", "财报"]), + ] + article = EnrichedArticle( + rk_article_id=1, + title="OpenAI 发布新一代大模型", + ) + assert classify_article(article, categories) == "科技" + + +def test_tag_article(db): + tags = [ + Taxonomy(name="人工智能", kind="tag", keywords=["AI", "人工智能", "大模型"]), + Taxonomy(name="半导体", kind="tag", keywords=["芯片", "半导体"]), + ] + article = EnrichedArticle( + rk_article_id=1, + title="OpenAI 发布新一代大模型", + ) + result = tag_article(article, tags) + assert "人工智能" in result + + +def test_tag_articles_integration(db): + db.add_all([ + Taxonomy(name="科技", kind="category", keywords=["AI", "大模型"]), + Taxonomy(name="人工智能", kind="tag", keywords=["AI", "大模型"]), + ]) + article = EnrichedArticle(rk_article_id=1, title="OpenAI 发布 GPT-5 大模型") + db.add(article) + db.commit() + + count = tag_articles(db) + assert count == 1 + assert article.category == "科技" + assert "人工智能" in article.tags