rssKeeper Feeds

commit 54e7db0ef051279f5a772f7a731ec51715513bc9 Author: congsh Date: Thu Jun 11 14:03:36 2026 +0800 feat: init rssKeeper - RSS 抓取、管理与检索系统完整功能包括: - FastAPI 后端 + SQLite + FTS5 全文搜索 - RSS 源管理、自动发现、OPML 导入导出 - 文章抓取、去重、分类、全文检索 - RSS 源健康度监控 - Vue 3 + Element Plus 暗色主题 Web UI - 对外 REST API 供 AI 分析调用 - Docker + docker-compose 部署 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d0afded --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +ENV/ +env/ +.venv + +# Database +data/*.db +!data/.gitkeep + +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Frontend build (will be built in Docker) +frontend/dist/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Docker +.dockerignore diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ff737b7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +# rssKeeper - 多阶段构建 +# Stage 1: 构建前端 +FROM node:20-alpine AS frontend-builder +WORKDIR /app/frontend +COPY frontend/package.json frontend/package-lock.json* ./ +RUN npm install +COPY frontend/ . +RUN npm run build + +# Stage 2: Python 后端 +FROM python:3.12-slim +WORKDIR /app + +# 安装系统依赖 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libxml2-dev \ + libxslt1-dev \ + && rm -rf /var/lib/apt/lists/* + +# 安装 Python 依赖 +COPY backend/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# 复制后端代码 +COPY backend/ . + +# 复制前端构建产物 +COPY --from=frontend-builder /app/frontend/dist ./static + +# 创建数据目录 +RUN mkdir -p /app/data + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..ce6897c --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +# rssKeeper + +RSS 抓取、管理与检索系统。支持 Docker 部署，包含 Web UI 和 REST API。 + +## 功能特性 + +- 📡 **RSS 源管理** — 添加、编辑、删除 RSS 源，支持自动发现 feed URL +- 📄 **文章管理** — 自动抓取、去重、分类，全文搜索 +- 🩺 **健康度监控** — 实时展示每个 RSS 源的成功率、最后更新、文章数量 +- 🔍 **全文检索** — 基于 SQLite FTS5 的全文搜索 +- 🐳 **Docker 部署** — 单容器部署，数据持久化 +- 🔌 **对外 API** — RESTful API 供 AI 或外部系统调用 + +## 快速开始 + +### Docker 部署（推荐） + +```bash +# 克隆项目 +git clone +cd rssKeeper + +# 启动 +docker-compose up -d --build + +# 访问 http://localhost:8000 +``` + +### 开发模式 + +```bash +# 后端 +cd backend +pip install -r requirements.txt +uvicorn main:app --reload --port 8000 + +# 前端（另开终端） +cd frontend +npm install +npm run dev +``` + +## 对外 API + +### 获取最近文章（供 AI 分析） + +```bash +# 获取最近 24 小时的文章 +curl "http://localhost:8000/api/v1/external/recent?hours=24&limit=50" + +# 指定 RSS 源 +curl "http://localhost:8000/api/v1/external/recent?feed_id=1&hours=48" + +# 指定分类 +curl "http://localhost:8000/api/v1/external/recent?category=科技&hours=24" +``` + +### 获取源列表 + +```bash +curl "http://localhost:8000/api/v1/external/feeds" +``` + +### 按源获取文章 + +```bash +curl "http://localhost:8000/api/v1/external/feeds/1/articles?limit=100" +``` + +### 获取每日摘要 + +```bash +curl "http://localhost:8000/api/v1/external/summary?date=2024-06-01" +``` + +## 技术栈 + +- **后端**: Python 3.12 + FastAPI + SQLAlchemy + APScheduler +- **数据库**: SQLite（FTS5 全文搜索） +- **前端**: Vue 3 + Element Plus + Vite +- **部署**: Docker + docker-compose diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 0000000..ae9d203 --- /dev/null +++ b/backend/config.py @@ -0,0 +1,26 @@ +"""配置管理 - 环境变量 + 默认值""" +import os +from pathlib import Path + +# 项目根目录 +BASE_DIR = Path(__file__).parent +DATA_DIR = Path(os.getenv("DATA_DIR", "/app/data")) +DATA_DIR.mkdir(parents=True, exist_ok=True) + +# 数据库 +DATABASE_URL = os.getenv("DATABASE_URL", str(DATA_DIR / "rsskeeper.db")) + +# RSS 抓取配置 +FETCH_CONCURRENCY = int(os.getenv("FETCH_CONCURRENCY", "10")) +FETCH_TIMEOUT = int(os.getenv("FETCH_TIMEOUT", "30")) +DEFAULT_FETCH_INTERVAL = int(os.getenv("DEFAULT_FETCH_INTERVAL", "60")) # 分钟 +MIN_FETCH_INTERVAL = int(os.getenv("MIN_FETCH_INTERVAL", "15")) # 最小间隔15分钟 + +# 内容处理 +MAX_ARTICLE_CONTENT_LENGTH = int(os.getenv("MAX_ARTICLE_CONTENT_LENGTH", "50000")) +MAX_SUMMARY_LENGTH = int(os.getenv("MAX_SUMMARY_LENGTH", "500")) +ARTICLE_RETENTION_DAYS = int(os.getenv("ARTICLE_RETENTION_DAYS", "0")) # 0 = 永久保留 + +# API 配置 +API_PREFIX = "/api" +EXTERNAL_API_PREFIX = "/api/v1/external" diff --git a/backend/database.py b/backend/database.py new file mode 100644 index 0000000..efcc5bd --- /dev/null +++ b/backend/database.py @@ -0,0 +1,89 @@ +"""数据库连接与初始化""" +from sqlalchemy import create_engine, event +from sqlalchemy.orm import sessionmaker, declarative_base +from config import DATABASE_URL + +# SQLite 连接 +engine = create_engine( + f"sqlite:///{DATABASE_URL}", + connect_args={"check_same_thread": False}, + echo=False, +) + +# 启用 SQLite 外键约束 +@event.listens_for(engine, "connect") +def set_sqlite_pragma(dbapi_conn, connection_record): + cursor = dbapi_conn.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +Base = declarative_base() + + +def get_db(): + """FastAPI 依赖注入用""" + db = SessionLocal() + try: + yield db + finally: + db.close() + + +def init_db(): + """初始化数据库表""" + from models import Feed, Article, FetchLog # noqa + + Base.metadata.create_all(bind=engine) + init_fts5() + + +def init_fts5(): + """初始化 FTS5 全文搜索虚拟表""" + conn = engine.raw_connection() + cursor = conn.cursor() + + # 检查 FTS5 扩展是否可用 + try: + cursor.execute("SELECT sqlite_compileoption_used('ENABLE_FTS5')") + has_fts5 = cursor.fetchone()[0] + if not has_fts5: + print("警告: SQLite 未启用 FTS5 扩展，全文搜索将不可用") + return + except Exception: + pass + + # 创建 FTS5 虚拟表 + cursor.execute(""" + CREATE VIRTUAL TABLE IF NOT EXISTS articles_fts USING fts5( + title, content, + content='articles', + content_rowid='id' + ) + """) + + # 创建触发器，自动同步 articles 表到 FTS5 + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS articles_fts_insert AFTER INSERT ON articles BEGIN + INSERT INTO articles_fts(rowid, title, content) + VALUES (new.id, new.title, new.content); + END + """) + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS articles_fts_delete AFTER DELETE ON articles BEGIN + INSERT INTO articles_fts(articles_fts, rowid, title, content) + VALUES ('delete', old.id, old.title, old.content); + END + """) + cursor.execute(""" + CREATE TRIGGER IF NOT EXISTS articles_fts_update AFTER UPDATE ON articles BEGIN + INSERT INTO articles_fts(articles_fts, rowid, title, content) + VALUES ('delete', old.id, old.title, old.content); + INSERT INTO articles_fts(rowid, title, content) + VALUES (new.id, new.title, new.content); + END + """) + + conn.commit() + cursor.close() + conn.close() diff --git a/backend/fulltext_search.py b/backend/fulltext_search.py new file mode 100644 index 0000000..cff5c67 --- /dev/null +++ b/backend/fulltext_search.py @@ -0,0 +1,81 @@ +"""SQLite FTS5 全文搜索封装""" +from sqlalchemy import text +from database import engine + + +def search_articles(query: str, limit: int = 50, offset: int = 0): + """全文搜索文章 + 返回 [(article_id, title, content_snippet, rank), ...] + """ + if not query or not query.strip(): + return [], 0 + + # 转义 FTS5 特殊字符 + query = query.replace('"', '""').strip() + + conn = engine.raw_connection() + cursor = conn.cursor() + + try: + # 使用 FTS5 查询 + sql = """ + SELECT a.id, a.title, a.summary, a.link, a.published_at, a.created_at, + f.id as feed_id, f.title as feed_title, f.category, + rank + FROM articles_fts + JOIN articles a ON articles_fts.rowid = a.id + JOIN feeds f ON a.feed_id = f.id + WHERE articles_fts MATCH ? + ORDER BY rank + LIMIT ? OFFSET ? + """ + cursor.execute(sql, (query, limit, offset)) + rows = cursor.fetchall() + + # 获取总数 + count_sql = """ + SELECT COUNT(*) FROM articles_fts WHERE articles_fts MATCH ? + """ + cursor.execute(count_sql, (query,)) + total = cursor.fetchone()[0] + + results = [] + for row in rows: + results.append({ + "id": row[0], + "title": row[1], + "summary": row[2], + "link": row[3], + "published_at": row[4], + "created_at": row[5], + "feed_id": row[6], + "feed_title": row[7], + "category": row[8], + }) + + return results, total + except Exception as e: + # FTS5 查询失败时返回空结果 + return [], 0 + finally: + cursor.close() + conn.close() + + +def rebuild_fts_index(): + """重建 FTS5 索引（数据不一致时使用）""" + conn = engine.raw_connection() + cursor = conn.cursor() + try: + cursor.execute("DELETE FROM articles_fts") + cursor.execute(""" + INSERT INTO articles_fts(rowid, title, content) + SELECT id, title, content FROM articles + """) + conn.commit() + return True + except Exception: + return False + finally: + cursor.close() + conn.close() diff --git a/backend/health_checker.py b/backend/health_checker.py new file mode 100644 index 0000000..a1f1f83 --- /dev/null +++ b/backend/health_checker.py @@ -0,0 +1,112 @@ +"""RSS 源健康度检测""" +from datetime import datetime, timedelta +from typing import List, Dict +from sqlalchemy.orm import Session +from models import Feed, FetchLog + + +def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]: + """获取 RSS 源健康度信息 + 返回每个源的健康状态详情 + """ + query = db.query(Feed) + if feed_id: + query = query.filter(Feed.id == feed_id) + + feeds = query.all() + results = [] + + for feed in feeds: + total = feed.success_count + feed.fail_count + success_rate = round(feed.success_count / total * 100, 1) if total > 0 else 0 + + days_since_fetch = None + if feed.last_fetch_at: + days_since_fetch = (datetime.utcnow() - feed.last_fetch_at).days + + # 获取最近 7 天抓取记录 + recent_logs = db.query(FetchLog).filter( + FetchLog.feed_id == feed.id, + FetchLog.created_at >= datetime.utcnow() - timedelta(days=7) + ).order_by(FetchLog.created_at.desc()).limit(10).all() + + health = feed.health_status() + + results.append({ + "id": feed.id, + "title": feed.title or feed.url, + "url": feed.url, + "is_active": feed.is_active, + "health_status": health, + "health_label": _health_label(health), + "success_rate": success_rate, + "success_count": feed.success_count, + "fail_count": feed.fail_count, + "total_fetches": total, + "last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None, + "days_since_fetch": days_since_fetch, + "article_count": feed.article_count, + "last_error": feed.last_error, + "recent_logs": [ + { + "status": log.status, + "articles_fetched": log.articles_fetched, + "response_time_ms": log.response_time_ms, + "created_at": log.created_at.isoformat(), + "error_message": log.error_message if log.status == "fail" else None, + } + for log in recent_logs + ], + }) + + return results + + +def _health_label(status: str) -> str: + labels = { + "healthy": "健康", + "warning": "警告", + "unhealthy": "异常", + "unknown": "未知", + } + return labels.get(status, "未知") + + +def get_overall_stats(db: Session) -> Dict: + """获取整体统计信息""" + total_feeds = db.query(Feed).count() + active_feeds = db.query(Feed).filter(Feed.is_active == True).count() + total_articles = db.query(Feed).with_entities(Feed.article_count).all() + total_articles_count = sum(a[0] for a in total_articles) if total_articles else 0 + + # 健康源统计 + feeds = db.query(Feed).all() + healthy = warning = unhealthy = 0 + for feed in feeds: + status = feed.health_status() + if status == "healthy": + healthy += 1 + elif status == "warning": + warning += 1 + elif status == "unhealthy": + unhealthy += 1 + + # 今日抓取 + today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) + from models import FetchLog + today_fetches = db.query(FetchLog).filter(FetchLog.created_at >= today).count() + today_success = db.query(FetchLog).filter( + FetchLog.created_at >= today, FetchLog.status == "success" + ).count() + + return { + "total_feeds": total_feeds, + "active_feeds": active_feeds, + "total_articles": total_articles_count, + "healthy_feeds": healthy, + "warning_feeds": warning, + "unhealthy_feeds": unhealthy, + "today_fetches": today_fetches, + "today_success": today_success, + "today_success_rate": round(today_success / today_fetches * 100, 1) if today_fetches > 0 else 0, + } diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..aa5f5a9 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,75 @@ +"""rssKeeper - FastAPI 入口""" +import os +from contextlib import asynccontextmanager +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse +from starlette.middleware.cors import CORSMiddleware +from database import init_db, SessionLocal +from scheduler import init_feed_jobs, stop_scheduler +from routers import feeds, articles, dashboard, external_api +import config + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """应用生命周期管理""" + # 启动时：初始化数据库 + 注册定时任务 + init_db() + db = SessionLocal() + try: + init_feed_jobs(db) + finally: + db.close() + + yield + + # 关闭时：停止调度器 + stop_scheduler() + + +app = FastAPI( + title="rssKeeper", + description="RSS 抓取、管理与检索系统", + version="1.0.0", + lifespan=lifespan, +) + +# CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# API 路由 +app.include_router(feeds.router, prefix=config.API_PREFIX) +app.include_router(articles.router, prefix=config.API_PREFIX) +app.include_router(dashboard.router, prefix=config.API_PREFIX) +app.include_router(external_api.router, prefix=config.EXTERNAL_API_PREFIX) + + +@app.get("/api/health") +def health_check(): + """健康检查""" + return {"status": "ok", "service": "rssKeeper"} + + +# 静态文件服务（前端构建产物） +static_dir = os.path.join(config.BASE_DIR, "static") +if os.path.exists(static_dir): + app.mount("/static", StaticFiles(directory=static_dir), name="static") + + @app.get("/{full_path:path}") + async def serve_spa(full_path: str): + """Vue SPA 路由回退""" + # API 路由不走这里 + if full_path.startswith("api/") or full_path.startswith("docs") or full_path.startswith("openapi.json"): + return {"detail": "Not found"} + + index_path = os.path.join(static_dir, "index.html") + if os.path.exists(index_path): + return FileResponse(index_path) + return {"detail": "Frontend not built"} diff --git a/backend/models.py b/backend/models.py new file mode 100644 index 0000000..c4dd520 --- /dev/null +++ b/backend/models.py @@ -0,0 +1,90 @@ +"""SQLAlchemy 数据模型""" +from datetime import datetime +from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey +from sqlalchemy.orm import relationship +from database import Base + + +class Feed(Base): + """RSS 源""" + __tablename__ = "feeds" + + id = Column(Integer, primary_key=True, index=True) + url = Column(String(2048), unique=True, nullable=False, index=True) + title = Column(String(512), default="") + description = Column(Text, default="") + category = Column(String(128), default="") + is_active = Column(Boolean, default=True, index=True) + fetch_interval_minutes = Column(Integer, default=60) + + # 抓取统计 + last_fetch_at = Column(DateTime, nullable=True) + last_fetch_status = Column(String(20), default="") + last_error = Column(Text, default="") + success_count = Column(Integer, default=0) + fail_count = Column(Integer, default=0) + article_count = Column(Integer, default=0) + + created_at = Column(DateTime, default=datetime.utcnow) + + # 关联 + articles = relationship("Article", back_populates="feed", cascade="all, delete-orphan") + fetch_logs = relationship("FetchLog", back_populates="feed", cascade="all, delete-orphan") + + def health_status(self): + """计算健康度 + 🟢 健康: 成功率 >= 90%, 最近7天有更新 + 🟡 警告: 成功率 50%-90%, 或超过3天未更新 + 🔴 异常: 成功率 < 50%, 或超过7天未更新 + """ + total = self.success_count + self.fail_count + if total == 0: + return "unknown" + + success_rate = self.success_count / total + + days_since_last_fetch = None + if self.last_fetch_at: + days_since_last_fetch = (datetime.utcnow() - self.last_fetch_at).days + + if success_rate >= 0.9 and (days_since_last_fetch is None or days_since_last_fetch <= 7): + return "healthy" + elif success_rate >= 0.5 and (days_since_last_fetch is None or days_since_last_fetch <= 7): + return "warning" + else: + return "unhealthy" + + +class Article(Base): + """RSS 文章""" + __tablename__ = "articles" + + id = Column(Integer, primary_key=True, index=True) + feed_id = Column(Integer, ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True) + title = Column(String(1024), default="", index=True) + link = Column(String(2048), unique=True, nullable=False, index=True) + author = Column(String(256), default="") + published_at = Column(DateTime, nullable=True, index=True) + content = Column(Text, default="") + summary = Column(Text, default="") + is_read = Column(Boolean, default=False) + created_at = Column(DateTime, default=datetime.utcnow, index=True) + + # 关联 + feed = relationship("Feed", back_populates="articles") + + +class FetchLog(Base): + """抓取日志""" + __tablename__ = "fetch_logs" + + id = Column(Integer, primary_key=True, index=True) + feed_id = Column(Integer, ForeignKey("feeds.id", ondelete="CASCADE"), nullable=False, index=True) + status = Column(String(20), nullable=False) # success / fail + articles_fetched = Column(Integer, default=0) + error_message = Column(Text, default="") + response_time_ms = Column(Integer, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, index=True) + + # 关联 + feed = relationship("Feed", back_populates="fetch_logs") diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..c4e228b --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,9 @@ +fastapi>=0.110.0 +uvicorn[standard]>=0.29.0 +sqlalchemy>=2.0.0 +pydantic>=2.6.0 +feedparser>=6.0.11 +requests>=2.31.0 +beautifulsoup4>=4.12.0 +apscheduler>=3.10.4 +lxml>=5.1.0 diff --git a/backend/routers/articles.py b/backend/routers/articles.py new file mode 100644 index 0000000..1f3a52e --- /dev/null +++ b/backend/routers/articles.py @@ -0,0 +1,133 @@ +"""文章管理 API""" +from typing import Optional +from fastapi import APIRouter, Depends +from pydantic import BaseModel +from sqlalchemy.orm import Session +from sqlalchemy import desc +from database import get_db +from models import Article, Feed +from fulltext_search import search_articles + +router = APIRouter(prefix="/articles", tags=["articles"]) + + +class ArticleOut(BaseModel): + id: int + feed_id: int + title: str + link: str + author: str + published_at: Optional[str] + summary: str + is_read: bool + created_at: str + feed_title: str + category: str + + class Config: + from_attributes = True + + +@router.get("") +def list_articles( + skip: int = 0, + limit: int = 50, + feed_id: Optional[int] = None, + category: Optional[str] = None, + search: Optional[str] = None, + since: Optional[str] = None, + until: Optional[str] = None, + is_read: Optional[bool] = None, + db: Session = Depends(get_db), +): + """获取文章列表，支持多种筛选条件""" + + # 如果有搜索关键词，使用 FTS5 全文搜索 + if search and search.strip(): + results, total = search_articles(search.strip(), limit=limit, offset=skip) + return {"total": total, "items": results} + + query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed) + + if feed_id: + query = query.filter(Article.feed_id == feed_id) + if category: + query = query.filter(Feed.category == category) + if is_read is not None: + query = query.filter(Article.is_read == is_read) + if since: + query = query.filter(Article.published_at >= since) + if until: + query = query.filter(Article.published_at <= until) + + total = query.count() + rows = query.order_by(desc(Article.published_at)).offset(skip).limit(limit).all() + + items = [] + for article, feed_title, category in rows: + items.append({ + "id": article.id, + "feed_id": article.feed_id, + "title": article.title or "", + "link": article.link, + "author": article.author or "", + "published_at": article.published_at.isoformat() if article.published_at else None, + "summary": article.summary or "", + "is_read": article.is_read, + "created_at": article.created_at.isoformat(), + "feed_title": feed_title or "", + "category": category or "", + }) + + return {"total": total, "items": items} + + +@router.get("/{article_id}") +def get_article(article_id: int, db: Session = Depends(get_db)): + """获取文章详情""" + article = db.query(Article).filter(Article.id == article_id).first() + if not article: + raise HTTPException(status_code=404, detail="文章不存在") + + feed = db.query(Feed).filter(Feed.id == article.feed_id).first() + + return { + "id": article.id, + "feed_id": article.feed_id, + "title": article.title or "", + "link": article.link, + "author": article.author or "", + "published_at": article.published_at.isoformat() if article.published_at else None, + "content": article.content or "", + "summary": article.summary or "", + "is_read": article.is_read, + "created_at": article.created_at.isoformat(), + "feed_title": feed.title if feed else "", + "category": feed.category if feed else "", + } + + +@router.put("/{article_id}/read") +def mark_read(article_id: int, db: Session = Depends(get_db)): + """标记文章为已读""" + article = db.query(Article).filter(Article.id == article_id).first() + if not article: + raise HTTPException(status_code=404, detail="文章不存在") + + article.is_read = True + db.commit() + return {"message": "已标记为已读"} + + +@router.get("/search/fulltext") +def fulltext_search( + q: str, + skip: int = 0, + limit: int = 50, +): + """全文搜索文章""" + results, total = search_articles(q, limit=limit, offset=skip) + return {"total": total, "items": results} + + +from fastapi import HTTPException diff --git a/backend/routers/dashboard.py b/backend/routers/dashboard.py new file mode 100644 index 0000000..206f910 --- /dev/null +++ b/backend/routers/dashboard.py @@ -0,0 +1,58 @@ +"""仪表盘统计 API""" +from fastapi import APIRouter, Depends +from sqlalchemy.orm import Session +from database import get_db +from health_checker import get_overall_stats, get_feed_health + +router = APIRouter(prefix="/dashboard", tags=["dashboard"]) + + +@router.get("/stats") +def dashboard_stats(db: Session = Depends(get_db)): + """仪表盘统计数据""" + return get_overall_stats(db) + + +@router.get("/health") +def dashboard_health( + skip: int = 0, + limit: int = 100, + db: Session = Depends(get_db), +): + """RSS 源健康度列表""" + all_health = get_feed_health(db) + total = len(all_health) + + # 按健康状态排序：异常在前 + status_order = {"unhealthy": 0, "warning": 1, "unknown": 2, "healthy": 3} + all_health.sort(key=lambda x: status_order.get(x["health_status"], 2)) + + items = all_health[skip:skip + limit] + return {"total": total, "items": items} + + +@router.get("/recent-activity") +def recent_activity(limit: int = 20, db: Session = Depends(get_db)): + """最近的抓取活动""" + from models import FetchLog, Feed + from sqlalchemy import desc + + logs = db.query(FetchLog, Feed.title.label("feed_title")).join(Feed).order_by( + desc(FetchLog.created_at) + ).limit(limit).all() + + return { + "items": [ + { + "id": log.id, + "feed_id": log.feed_id, + "feed_title": feed_title or "", + "status": log.status, + "articles_fetched": log.articles_fetched, + "response_time_ms": log.response_time_ms, + "error_message": log.error_message, + "created_at": log.created_at.isoformat(), + } + for log, feed_title in logs + ] + } diff --git a/backend/routers/external_api.py b/backend/routers/external_api.py new file mode 100644 index 0000000..b849619 --- /dev/null +++ b/backend/routers/external_api.py @@ -0,0 +1,163 @@ +"""对外 API（供 AI/外部系统调用）""" +from typing import Optional +from datetime import datetime, timedelta +from fastapi import APIRouter, Depends +from sqlalchemy.orm import Session +from sqlalchemy import desc +from database import get_db +from models import Article, Feed + +router = APIRouter(prefix="/external", tags=["external"]) + + +@router.get("/recent") +def get_recent_articles( + hours: int = 24, + limit: int = 50, + feed_id: Optional[int] = None, + category: Optional[str] = None, + db: Session = Depends(get_db), +): + """获取最近 N 小时的文章 + 这是对外提供给 AI 分析的主要接口 + """ + since = datetime.utcnow() - timedelta(hours=hours) + + query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed) + + query = query.filter(Article.created_at >= since) + + if feed_id: + query = query.filter(Article.feed_id == feed_id) + if category: + query = query.filter(Feed.category == category) + + rows = query.order_by(desc(Article.published_at)).limit(limit).all() + + return { + "query": { + "hours": hours, + "limit": limit, + "feed_id": feed_id, + "category": category, + }, + "count": len(rows), + "articles": [ + { + "id": article.id, + "title": article.title or "", + "link": article.link, + "author": article.author or "", + "summary": article.summary or "", + "content": article.content or "" if len(article.content or "") < 10000 else article.summary or "", + "published_at": article.published_at.isoformat() if article.published_at else None, + "created_at": article.created_at.isoformat(), + "feed_title": feed_title or "", + "category": category or "", + } + for article, feed_title, category in rows + ], + } + + +@router.get("/feeds") +def get_active_feeds(db: Session = Depends(get_db)): + """获取所有活跃的 RSS 源列表""" + feeds = db.query(Feed).filter(Feed.is_active == True).all() + + return { + "count": len(feeds), + "feeds": [ + { + "id": feed.id, + "title": feed.title or feed.url, + "url": feed.url, + "category": feed.category or "", + "article_count": feed.article_count, + "last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None, + } + for feed in feeds + ], + } + + +@router.get("/feeds/{feed_id}/articles") +def get_feed_articles( + feed_id: int, + limit: int = 100, + since: Optional[str] = None, + db: Session = Depends(get_db), +): + """获取指定 RSS 源的文章""" + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + return {"error": "Feed not found"} + + query = db.query(Article).filter(Article.feed_id == feed_id) + + if since: + query = query.filter(Article.published_at >= since) + + articles = query.order_by(desc(Article.published_at)).limit(limit).all() + + return { + "feed": { + "id": feed.id, + "title": feed.title or feed.url, + "url": feed.url, + }, + "count": len(articles), + "articles": [ + { + "id": article.id, + "title": article.title or "", + "link": article.link, + "author": article.author or "", + "summary": article.summary or "", + "published_at": article.published_at.isoformat() if article.published_at else None, + } + for article in articles + ], + } + + +@router.get("/summary") +def get_daily_summary( + date: Optional[str] = None, + db: Session = Depends(get_db), +): + """获取指定日期的文章摘要统计 + 供 AI 快速了解某天的 RSS 内容概况 + """ + if date: + try: + day = datetime.strptime(date, "%Y-%m-%d") + next_day = day + timedelta(days=1) + except ValueError: + return {"error": "Invalid date format, use YYYY-MM-DD"} + else: + day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) + next_day = day + timedelta(days=1) + + query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed) + query = query.filter(Article.created_at >= day, Article.created_at < next_day) + rows = query.order_by(desc(Article.published_at)).all() + + # 按分类统计 + by_category = {} + for article, feed_title, category in rows: + cat = category or "未分类" + if cat not in by_category: + by_category[cat] = [] + by_category[cat].append({ + "title": article.title or "", + "link": article.link, + "feed": feed_title or "", + "summary": article.summary or "", + }) + + return { + "date": day.strftime("%Y-%m-%d"), + "total_articles": len(rows), + "by_category": by_category, + } diff --git a/backend/routers/feeds.py b/backend/routers/feeds.py new file mode 100644 index 0000000..41aa704 --- /dev/null +++ b/backend/routers/feeds.py @@ -0,0 +1,273 @@ +"""RSS 源管理 API""" +from typing import List, Optional +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, HttpUrl +from sqlalchemy.orm import Session +from database import get_db +from models import Feed +from rss_fetcher import discover_feed_url, fetch_and_store_feed +from scheduler import add_feed_job, remove_feed_job + +router = APIRouter(prefix="/feeds", tags=["feeds"]) + + +class FeedCreate(BaseModel): + url: str + title: Optional[str] = "" + description: Optional[str] = "" + category: Optional[str] = "" + is_active: Optional[bool] = True + fetch_interval_minutes: Optional[int] = 60 + + +class FeedUpdate(BaseModel): + title: Optional[str] = None + description: Optional[str] = None + category: Optional[str] = None + is_active: Optional[bool] = None + fetch_interval_minutes: Optional[int] = None + + +class FeedOut(BaseModel): + id: int + url: str + title: str + description: str + category: str + is_active: bool + fetch_interval_minutes: int + last_fetch_at: Optional[str] = None + last_fetch_status: str + success_count: int + fail_count: int + article_count: int + health_status: str + created_at: str + + class Config: + from_attributes = True + + +@router.get("", response_model=dict) +def list_feeds( + skip: int = 0, + limit: int = 100, + category: Optional[str] = None, + search: Optional[str] = None, + is_active: Optional[bool] = None, + db: Session = Depends(get_db), +): + """获取 RSS 源列表，支持分页、分类筛选、搜索""" + query = db.query(Feed) + + if category: + query = query.filter(Feed.category == category) + if is_active is not None: + query = query.filter(Feed.is_active == is_active) + if search: + query = query.filter( + Feed.title.contains(search) | Feed.url.contains(search) | Feed.description.contains(search) + ) + + total = query.count() + feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all() + + results = [] + for feed in feeds: + data = { + "id": feed.id, + "url": feed.url, + "title": feed.title or feed.url, + "description": feed.description or "", + "category": feed.category or "", + "is_active": feed.is_active, + "fetch_interval_minutes": feed.fetch_interval_minutes, + "last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None, + "last_fetch_status": feed.last_fetch_status, + "success_count": feed.success_count, + "fail_count": feed.fail_count, + "article_count": feed.article_count, + "health_status": feed.health_status(), + "created_at": feed.created_at.isoformat(), + } + results.append(data) + + return {"total": total, "items": results} + + +@router.get("/categories") +def list_categories(db: Session = Depends(get_db)): + """获取所有分类列表""" + categories = db.query(Feed.category).filter(Feed.category != "").distinct().all() + return [c[0] for c in categories if c[0]] + + +@router.post("", response_model=dict) +def create_feed(data: FeedCreate, db: Session = Depends(get_db)): + """添加 RSS 源""" + # 检查是否已存在 + existing = db.query(Feed).filter(Feed.url == data.url).first() + if existing: + raise HTTPException(status_code=409, detail="该 RSS 源已存在") + + feed = Feed( + url=data.url, + title=data.title or "", + description=data.description or "", + category=data.category or "", + is_active=data.is_active, + fetch_interval_minutes=data.fetch_interval_minutes or 60, + ) + db.add(feed) + db.commit() + db.refresh(feed) + + # 注册定时任务 + if feed.is_active: + add_feed_job(feed.id, feed.fetch_interval_minutes) + + # 立即抓取一次 + fetch_and_store_feed(feed.id) + + return {"id": feed.id, "message": "RSS 源添加成功", "url": feed.url} + + +@router.post("/discover") +def discover_feed(url: str, db: Session = Depends(get_db)): + """从网页自动发现 RSS feed URL""" + feed_urls = discover_feed_url(url) + return {"source_url": url, "found_feeds": feed_urls} + + +@router.get("/{feed_id}", response_model=dict) +def get_feed(feed_id: int, db: Session = Depends(get_db)): + """获取 RSS 源详情""" + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + raise HTTPException(status_code=404, detail="RSS 源不存在") + + return { + "id": feed.id, + "url": feed.url, + "title": feed.title or feed.url, + "description": feed.description or "", + "category": feed.category or "", + "is_active": feed.is_active, + "fetch_interval_minutes": feed.fetch_interval_minutes, + "last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None, + "last_fetch_status": feed.last_fetch_status, + "last_error": feed.last_error, + "success_count": feed.success_count, + "fail_count": feed.fail_count, + "article_count": feed.article_count, + "health_status": feed.health_status(), + "created_at": feed.created_at.isoformat(), + } + + +@router.put("/{feed_id}", response_model=dict) +def update_feed(feed_id: int, data: FeedUpdate, db: Session = Depends(get_db)): + """更新 RSS 源""" + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + raise HTTPException(status_code=404, detail="RSS 源不存在") + + if data.title is not None: + feed.title = data.title + if data.description is not None: + feed.description = data.description + if data.category is not None: + feed.category = data.category + if data.is_active is not None: + feed.is_active = data.is_active + if feed.is_active: + add_feed_job(feed.id, feed.fetch_interval_minutes) + else: + remove_feed_job(feed.id) + if data.fetch_interval_minutes is not None: + feed.fetch_interval_minutes = data.fetch_interval_minutes + if feed.is_active: + add_feed_job(feed.id, feed.fetch_interval_minutes) + + db.commit() + return {"message": "RSS 源更新成功"} + + +@router.delete("/{feed_id}") +def delete_feed(feed_id: int, db: Session = Depends(get_db)): + """删除 RSS 源（级联删除文章和日志）""" + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + raise HTTPException(status_code=404, detail="RSS 源不存在") + + remove_feed_job(feed_id) + db.delete(feed) + db.commit() + return {"message": "RSS 源已删除"} + + +@router.post("/{feed_id}/fetch") +def trigger_fetch(feed_id: int, db: Session = Depends(get_db)): + """手动触发抓取""" + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + raise HTTPException(status_code=404, detail="RSS 源不存在") + + result = fetch_and_store_feed(feed_id) + return result + + +@router.post("/import-opml") +def import_opml(opml_content: str, db: Session = Depends(get_db)): + """导入 OPML 文件内容""" + import xml.etree.ElementTree as ET + + try: + root = ET.fromstring(opml_content) + except ET.ParseError: + raise HTTPException(status_code=400, detail="无效的 OPML 文件") + + added = 0 + skipped = 0 + + for outline in root.iter("outline"): + url = outline.get("xmlUrl") or outline.get("xmlurl") + if not url: + continue + + existing = db.query(Feed).filter(Feed.url == url).first() + if existing: + skipped += 1 + continue + + feed = Feed( + url=url, + title=outline.get("title", "") or outline.get("text", ""), + description=outline.get("description", ""), + category=outline.get("category", ""), + is_active=True, + fetch_interval_minutes=60, + ) + db.add(feed) + db.commit() + db.refresh(feed) + + add_feed_job(feed.id, feed.fetch_interval_minutes) + added += 1 + + return {"added": added, "skipped": skipped, "message": f"成功导入 {added} 个 RSS 源"} + + +@router.get("/export-opml") +def export_opml(db: Session = Depends(get_db)): + """导出 OPML 文件内容""" + feeds = db.query(Feed).all() + + lines = ['', '', 'rssKeeper Feeds', ''] + for feed in feeds: + title = (feed.title or feed.url).replace('"', '"') + lines.append(f' ') + lines.append('') + lines.append('') + + return {"opml": "\n".join(lines)} diff --git a/backend/rss_fetcher.py b/backend/rss_fetcher.py new file mode 100644 index 0000000..a32dcfe --- /dev/null +++ b/backend/rss_fetcher.py @@ -0,0 +1,298 @@ +"""RSS 抓取核心逻辑""" +import time +import re +import html +import hashlib +from datetime import datetime, timezone +from concurrent.futures import ThreadPoolExecutor, as_completed +from urllib.parse import urljoin +import requests +import feedparser +from bs4 import BeautifulSoup +from sqlalchemy.orm import Session +from models import Feed, Article, FetchLog +from database import SessionLocal +import config + + +def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict: + """抓取单个 RSS 源 + 返回 {"success": bool, "feed_data": parsed, "error": str, "response_time_ms": int} + """ + start_time = time.time() + try: + headers = { + "User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)", + "Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*", + } + response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True) + response.raise_for_status() + + # 解析 RSS + parsed = feedparser.parse(response.content) + + response_time_ms = int((time.time() - start_time) * 1000) + + if parsed.bozo and hasattr(parsed, 'bozo_exception'): + # 有解析警告但可能仍然可用 + pass + + return { + "success": True, + "feed_data": parsed, + "error": None, + "response_time_ms": response_time_ms, + } + except requests.exceptions.RequestException as e: + return {"success": False, "feed_data": None, "error": str(e), "response_time_ms": None} + except Exception as e: + return {"success": False, "feed_data": None, "error": str(e), "response_time_ms": None} + + +def discover_feed_url(url: str, timeout: int = 15) -> list: + """从任意网页自动发现 RSS/Atom feed URL + 返回找到的 feed URL 列表 + """ + try: + headers = { + "User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)", + } + response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True) + response.raise_for_status() + + soup = BeautifulSoup(response.content, "html.parser") + feed_urls = [] + + # 查找标签 + for link in soup.find_all("link", rel="alternate"): + link_type = link.get("type", "").lower() + href = link.get("href", "") + if href and any(t in link_type for t in ["rss", "atom", "xml"]): + full_url = urljoin(response.url, href) + feed_urls.append(full_url) + + # 也查找常见的 RSS 链接 + common_patterns = [ + "/rss", "/feed", "/feeds", "/atom.xml", "/rss.xml", + "/index.xml", "/feed.xml", "/?feed=rss2", + ] + for pattern in common_patterns: + candidate = urljoin(response.url, pattern) + if candidate not in feed_urls: + # 验证是否是有效的 feed + try: + resp = requests.head(candidate, headers=headers, timeout=5, allow_redirects=True) + content_type = resp.headers.get("Content-Type", "").lower() + if any(t in content_type for t in ["rss", "atom", "xml"]): + feed_urls.append(candidate) + except Exception: + pass + + return list(dict.fromkeys(feed_urls)) # 去重保持顺序 + except Exception: + return [] + + +def parse_article(entry, feed_id: int) -> dict: + """从 feedparser entry 解析文章数据""" + title = entry.get("title", "") + link = entry.get("link", "") + author = entry.get("author", "") + + # 发布时间 + published_at = None + if hasattr(entry, "published_parsed") and entry.published_parsed: + try: + published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None) + except (ValueError, TypeError): + pass + if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed: + try: + published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None) + except (ValueError, TypeError): + pass + + # 内容：优先 summary，其次 content + content = "" + if hasattr(entry, "content") and entry.content: + content = entry.content[0].value + elif hasattr(entry, "summary"): + content = entry.summary + + # 清洗 HTML + content = clean_html(content) + + # 生成摘要 + summary = generate_summary(content) + + return { + "feed_id": feed_id, + "title": title[:1024], + "link": link[:2048], + "author": author[:256], + "published_at": published_at, + "content": content[:config.MAX_ARTICLE_CONTENT_LENGTH], + "summary": summary[:config.MAX_SUMMARY_LENGTH], + } + + +def clean_html(html_text: str) -> str: + """清洗 HTML，去除 script/style 标签，转为安全文本""" + if not html_text: + return "" + + # 先解码 HTML 实体 + text = html.unescape(html_text) + + # 用 BeautifulSoup 清理 + soup = BeautifulSoup(text, "html.parser") + + # 移除 script 和 style + for tag in soup(["script", "style", "iframe", "object", "embed"]): + tag.decompose() + + # 获取纯文本 + cleaned = soup.get_text(separator="\n") + + # 压缩空白行 + cleaned = re.sub(r"\n\s*\n+", "\n\n", cleaned) + cleaned = cleaned.strip() + + return cleaned + + +def generate_summary(content: str, max_length: int = 300) -> str: + """从内容生成摘要""" + if not content: + return "" + + # 去掉多余空白 + text = re.sub(r"\s+", " ", content).strip() + + if len(text) <= max_length: + return text + + # 在句子边界截断 + truncated = text[:max_length] + last_period = max(truncated.rfind("。"), truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? ")) + if last_period > max_length * 0.5: + return truncated[:last_period + 1] + + return truncated + "..." + + +def fetch_and_store_feed(feed_id: int) -> dict: + """抓取指定 RSS 源并存储文章 + 返回抓取结果统计 + """ + db = SessionLocal() + try: + feed = db.query(Feed).filter(Feed.id == feed_id).first() + if not feed: + return {"success": False, "error": "Feed not found", "articles_count": 0} + + result = fetch_feed(feed.url) + + if not result["success"]: + # 记录失败 + feed.last_fetch_at = datetime.utcnow() + feed.last_fetch_status = "fail" + feed.last_error = result["error"] + feed.fail_count += 1 + + log = FetchLog( + feed_id=feed_id, + status="fail", + error_message=result["error"], + response_time_ms=result.get("response_time_ms"), + ) + db.add(log) + db.commit() + return {"success": False, "error": result["error"], "articles_count": 0} + + parsed = result["feed_data"] + + # 更新 feed 元信息 + if hasattr(parsed.feed, "title"): + feed.title = parsed.feed.title[:512] + if hasattr(parsed.feed, "description"): + feed.description = parsed.feed.description[:1000] + + # 存储文章 + new_count = 0 + for entry in parsed.entries: + article_data = parse_article(entry, feed_id) + if not article_data["link"]: + continue + + # 检查是否已存在（基于 link） + existing = db.query(Article).filter(Article.link == article_data["link"]).first() + if existing: + # 更新已有文章 + existing.title = article_data["title"] or existing.title + existing.content = article_data["content"] or existing.content + existing.summary = article_data["summary"] or existing.summary + existing.author = article_data["author"] or existing.author + if article_data["published_at"]: + existing.published_at = article_data["published_at"] + else: + article = Article(**article_data) + db.add(article) + new_count += 1 + + # 更新 feed 统计 + feed.last_fetch_at = datetime.utcnow() + feed.last_fetch_status = "success" + feed.last_error = "" + feed.success_count += 1 + feed.article_count = db.query(Article).filter(Article.feed_id == feed_id).count() + + log = FetchLog( + feed_id=feed_id, + status="success", + articles_fetched=new_count, + response_time_ms=result.get("response_time_ms"), + ) + db.add(log) + db.commit() + + return { + "success": True, + "articles_count": new_count, + "feed_title": feed.title, + } + except Exception as e: + db.rollback() + return {"success": False, "error": str(e), "articles_count": 0} + finally: + db.close() + + +def fetch_all_feeds(feed_ids: list = None) -> list: + """并发抓取多个 RSS 源 + 返回每个源的抓取结果列表 + """ + db = SessionLocal() + try: + query = db.query(Feed).filter(Feed.is_active == True) + if feed_ids: + query = query.filter(Feed.id.in_(feed_ids)) + feeds = query.all() + finally: + db.close() + + results = [] + with ThreadPoolExecutor(max_workers=config.FETCH_CONCURRENCY) as executor: + future_to_feed = { + executor.submit(fetch_and_store_feed, feed.id): feed + for feed in feeds + } + for future in as_completed(future_to_feed): + feed = future_to_feed[future] + try: + result = future.result() + results.append({"feed_id": feed.id, **result}) + except Exception as e: + results.append({"feed_id": feed.id, "success": False, "error": str(e), "articles_count": 0}) + + return results diff --git a/backend/scheduler.py b/backend/scheduler.py new file mode 100644 index 0000000..7c1f21c --- /dev/null +++ b/backend/scheduler.py @@ -0,0 +1,74 @@ +"""APScheduler 定时任务管理""" +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.triggers.interval import IntervalTrigger +from rss_fetcher import fetch_and_store_feed +import config + +_scheduler = None + + +def get_scheduler(): + """获取或创建调度器实例""" + global _scheduler + if _scheduler is None: + _scheduler = BackgroundScheduler() + return _scheduler + + +def add_feed_job(feed_id: int, interval_minutes: int): + """为指定 RSS 源添加定时抓取任务""" + scheduler = get_scheduler() + job_id = f"fetch_feed_{feed_id}" + + # 确保间隔不低于最小值 + interval = max(interval_minutes, config.MIN_FETCH_INTERVAL) + + # 如果任务已存在则更新 + existing = scheduler.get_job(job_id) + if existing: + existing.reschedule(trigger=IntervalTrigger(minutes=interval)) + return + + scheduler.add_job( + fetch_and_store_feed, + trigger=IntervalTrigger(minutes=interval), + id=job_id, + args=[feed_id], + replace_existing=True, + misfire_grace_time=300, # 5分钟容错 + coalesce=True, # 合并错过的任务 + ) + + +def remove_feed_job(feed_id: int): + """移除指定 RSS 源的定时任务""" + scheduler = get_scheduler() + job_id = f"fetch_feed_{feed_id}" + try: + scheduler.remove_job(job_id) + except Exception: + pass + + +def start_scheduler(): + """启动调度器""" + scheduler = get_scheduler() + if not scheduler.running: + scheduler.start() + + +def stop_scheduler(): + """停止调度器""" + global _scheduler + if _scheduler and _scheduler.running: + _scheduler.shutdown(wait=False) + _scheduler = None + + +def init_feed_jobs(db): + """从数据库加载所有活跃 RSS 源并注册定时任务""" + from models import Feed + feeds = db.query(Feed).filter(Feed.is_active == True).all() + for feed in feeds: + add_feed_job(feed.id, feed.fetch_interval_minutes or config.DEFAULT_FETCH_INTERVAL) + start_scheduler() diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ec77bff --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +version: '3.8' + +services: + rsskeeper: + build: + context: . + dockerfile: Dockerfile + container_name: rsskeeper + ports: + - "8000:8000" + volumes: + - ./data:/app/data + environment: + - DATA_DIR=/app/data + - DATABASE_URL=/app/data/rsskeeper.db + - FETCH_CONCURRENCY=10 + - FETCH_TIMEOUT=30 + - DEFAULT_FETCH_INTERVAL=60 + - MIN_FETCH_INTERVAL=15 + - MAX_ARTICLE_CONTENT_LENGTH=50000 + - MAX_SUMMARY_LENGTH=500 + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/api/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..78259bd --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,12 @@ + + + + + + rssKeeper - RSS 管理与检索 + + +

+ + + diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..462330c --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,22 @@ +{ + "name": "rsskeeper-frontend", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "vue": "^3.4.21", + "vue-router": "^4.3.0", + "element-plus": "^2.6.3", + "axios": "^1.6.8", + "@element-plus/icons-vue": "^2.3.1" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^5.0.4", + "vite": "^5.2.0" + } +} diff --git a/frontend/src/App.vue b/frontend/src/App.vue new file mode 100644 index 0000000..3fad404 --- /dev/null +++ b/frontend/src/App.vue @@ -0,0 +1,176 @@ + + + + + diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js new file mode 100644 index 0000000..2a64869 --- /dev/null +++ b/frontend/src/api/index.js @@ -0,0 +1,63 @@ +import axios from 'axios' + +const api = axios.create({ + baseURL: '', + timeout: 30000, + headers: { + 'Content-Type': 'application/json', + }, +}) + +// 请求拦截 +api.interceptors.request.use( + (config) => config, + (error) => Promise.reject(error) +) + +// 响应拦截 +api.interceptors.response.use( + (response) => response.data, + (error) => { + const msg = error.response?.data?.detail || error.message || '请求失败' + return Promise.reject(new Error(msg)) + } +) + +// RSS 源管理 +export const feedsApi = { + list: (params = {}) => api.get('/api/feeds', { params }), + categories: () => api.get('/api/feeds/categories'), + get: (id) => api.get(`/api/feeds/${id}`), + create: (data) => api.post('/api/feeds', data), + update: (id, data) => api.put(`/api/feeds/${id}`, data), + remove: (id) => api.delete(`/api/feeds/${id}`), + fetch: (id) => api.post(`/api/feeds/${id}/fetch`), + discover: (url) => api.post('/api/feeds/discover', null, { params: { url } }), + importOpml: (content) => api.post('/api/feeds/import-opml', { opml_content: content }), + exportOpml: () => api.get('/api/feeds/export-opml'), +} + +// 文章管理 +export const articlesApi = { + list: (params = {}) => api.get('/api/articles', { params }), + get: (id) => api.get(`/api/articles/${id}`), + search: (q) => api.get('/api/articles/search/fulltext', { params: { q } }), + markRead: (id) => api.put(`/api/articles/${id}/read`), +} + +// 仪表盘 +export const dashboardApi = { + stats: () => api.get('/api/dashboard/stats'), + health: (params = {}) => api.get('/api/dashboard/health', { params }), + recentActivity: () => api.get('/api/dashboard/recent-activity'), +} + +// 对外 API +export const externalApi = { + recent: (params = {}) => api.get('/api/v1/external/recent', { params }), + feeds: () => api.get('/api/v1/external/feeds'), + feedArticles: (id, params = {}) => api.get(`/api/v1/external/feeds/${id}/articles`, { params }), + summary: (date) => api.get('/api/v1/external/summary', { params: { date } }), +} + +export default api diff --git a/frontend/src/main.js b/frontend/src/main.js new file mode 100644 index 0000000..4857074 --- /dev/null +++ b/frontend/src/main.js @@ -0,0 +1,35 @@ +import { createApp } from 'vue' +import ElementPlus from 'element-plus' +import 'element-plus/dist/index.css' +import * as ElementPlusIconsVue from '@element-plus/icons-vue' +import { createRouter, createWebHistory } from 'vue-router' +import App from './App.vue' + +// 页面组件 +import Dashboard from './views/Dashboard.vue' +import Feeds from './views/Feeds.vue' +import Articles from './views/Articles.vue' +import ArticleDetail from './views/ArticleDetail.vue' + +const routes = [ + { path: '/', component: Dashboard, name: 'Dashboard' }, + { path: '/feeds', component: Feeds, name: 'Feeds' }, + { path: '/articles', component: Articles, name: 'Articles' }, + { path: '/articles/:id', component: ArticleDetail, name: 'ArticleDetail' }, +] + +const router = createRouter({ + history: createWebHistory(), + routes, +}) + +const app = createApp(App) + +// 注册所有图标 +for (const [key, component] of Object.entries(ElementPlusIconsVue)) { + app.component(key, component) +} + +app.use(ElementPlus) +app.use(router) +app.mount('#app') diff --git a/frontend/src/views/ArticleDetail.vue b/frontend/src/views/ArticleDetail.vue new file mode 100644 index 0000000..fdd80b2 --- /dev/null +++ b/frontend/src/views/ArticleDetail.vue @@ -0,0 +1,120 @@ + + + + + diff --git a/frontend/src/views/Articles.vue b/frontend/src/views/Articles.vue new file mode 100644 index 0000000..8cc14e1 --- /dev/null +++ b/frontend/src/views/Articles.vue @@ -0,0 +1,216 @@ + + + + + diff --git a/frontend/src/views/Dashboard.vue b/frontend/src/views/Dashboard.vue new file mode 100644 index 0000000..ed5e2e8 --- /dev/null +++ b/frontend/src/views/Dashboard.vue @@ -0,0 +1,192 @@ + + + + + diff --git a/frontend/src/views/Feeds.vue b/frontend/src/views/Feeds.vue new file mode 100644 index 0000000..a625206 --- /dev/null +++ b/frontend/src/views/Feeds.vue @@ -0,0 +1,369 @@ + + + diff --git a/frontend/vite.config.js b/frontend/vite.config.js new file mode 100644 index 0000000..d8474ad --- /dev/null +++ b/frontend/vite.config.js @@ -0,0 +1,30 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' +import { resolve } from 'path' + +export default defineConfig({ + plugins: [vue()], + base: '/', + build: { + outDir: 'dist', + assetsDir: 'assets', + }, + resolve: { + alias: { + '@': resolve(__dirname, 'src'), + }, + }, + server: { + port: 5173, + proxy: { + '/api': { + target: 'http://localhost:8000', + changeOrigin: true, + }, + '/api/v1': { + target: 'http://localhost:8000', + changeOrigin: true, + }, + }, + }, +})

{{ article.title || '无标题' }}

📄 文章列表

{{ article.title || '无标题' }}

📊 仪表盘

📡 RSS 源管理