feat: init rssKeeper - RSS 抓取、管理与检索系统

完整功能包括:
- FastAPI 后端 + SQLite + FTS5 全文搜索
- RSS 源管理、自动发现、OPML 导入导出
- 文章抓取、去重、分类、全文检索
- RSS 源健康度监控
- Vue 3 + Element Plus 暗色主题 Web UI
- 对外 REST API 供 AI 分析调用
- Docker + docker-compose 部署
This commit is contained in:
congsh
2026-06-11 14:03:36 +08:00
commit 54e7db0ef0
28 changed files with 2915 additions and 0 deletions
+133
View File
@@ -0,0 +1,133 @@
"""文章管理 API"""
from typing import Optional
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import desc
from database import get_db
from models import Article, Feed
from fulltext_search import search_articles
router = APIRouter(prefix="/articles", tags=["articles"])
class ArticleOut(BaseModel):
id: int
feed_id: int
title: str
link: str
author: str
published_at: Optional[str]
summary: str
is_read: bool
created_at: str
feed_title: str
category: str
class Config:
from_attributes = True
@router.get("")
def list_articles(
skip: int = 0,
limit: int = 50,
feed_id: Optional[int] = None,
category: Optional[str] = None,
search: Optional[str] = None,
since: Optional[str] = None,
until: Optional[str] = None,
is_read: Optional[bool] = None,
db: Session = Depends(get_db),
):
"""获取文章列表,支持多种筛选条件"""
# 如果有搜索关键词,使用 FTS5 全文搜索
if search and search.strip():
results, total = search_articles(search.strip(), limit=limit, offset=skip)
return {"total": total, "items": results}
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
if feed_id:
query = query.filter(Article.feed_id == feed_id)
if category:
query = query.filter(Feed.category == category)
if is_read is not None:
query = query.filter(Article.is_read == is_read)
if since:
query = query.filter(Article.published_at >= since)
if until:
query = query.filter(Article.published_at <= until)
total = query.count()
rows = query.order_by(desc(Article.published_at)).offset(skip).limit(limit).all()
items = []
for article, feed_title, category in rows:
items.append({
"id": article.id,
"feed_id": article.feed_id,
"title": article.title or "",
"link": article.link,
"author": article.author or "",
"published_at": article.published_at.isoformat() if article.published_at else None,
"summary": article.summary or "",
"is_read": article.is_read,
"created_at": article.created_at.isoformat(),
"feed_title": feed_title or "",
"category": category or "",
})
return {"total": total, "items": items}
@router.get("/{article_id}")
def get_article(article_id: int, db: Session = Depends(get_db)):
"""获取文章详情"""
article = db.query(Article).filter(Article.id == article_id).first()
if not article:
raise HTTPException(status_code=404, detail="文章不存在")
feed = db.query(Feed).filter(Feed.id == article.feed_id).first()
return {
"id": article.id,
"feed_id": article.feed_id,
"title": article.title or "",
"link": article.link,
"author": article.author or "",
"published_at": article.published_at.isoformat() if article.published_at else None,
"content": article.content or "",
"summary": article.summary or "",
"is_read": article.is_read,
"created_at": article.created_at.isoformat(),
"feed_title": feed.title if feed else "",
"category": feed.category if feed else "",
}
@router.put("/{article_id}/read")
def mark_read(article_id: int, db: Session = Depends(get_db)):
"""标记文章为已读"""
article = db.query(Article).filter(Article.id == article_id).first()
if not article:
raise HTTPException(status_code=404, detail="文章不存在")
article.is_read = True
db.commit()
return {"message": "已标记为已读"}
@router.get("/search/fulltext")
def fulltext_search(
q: str,
skip: int = 0,
limit: int = 50,
):
"""全文搜索文章"""
results, total = search_articles(q, limit=limit, offset=skip)
return {"total": total, "items": results}
from fastapi import HTTPException
+58
View File
@@ -0,0 +1,58 @@
"""仪表盘统计 API"""
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from database import get_db
from health_checker import get_overall_stats, get_feed_health
router = APIRouter(prefix="/dashboard", tags=["dashboard"])
@router.get("/stats")
def dashboard_stats(db: Session = Depends(get_db)):
"""仪表盘统计数据"""
return get_overall_stats(db)
@router.get("/health")
def dashboard_health(
skip: int = 0,
limit: int = 100,
db: Session = Depends(get_db),
):
"""RSS 源健康度列表"""
all_health = get_feed_health(db)
total = len(all_health)
# 按健康状态排序:异常在前
status_order = {"unhealthy": 0, "warning": 1, "unknown": 2, "healthy": 3}
all_health.sort(key=lambda x: status_order.get(x["health_status"], 2))
items = all_health[skip:skip + limit]
return {"total": total, "items": items}
@router.get("/recent-activity")
def recent_activity(limit: int = 20, db: Session = Depends(get_db)):
"""最近的抓取活动"""
from models import FetchLog, Feed
from sqlalchemy import desc
logs = db.query(FetchLog, Feed.title.label("feed_title")).join(Feed).order_by(
desc(FetchLog.created_at)
).limit(limit).all()
return {
"items": [
{
"id": log.id,
"feed_id": log.feed_id,
"feed_title": feed_title or "",
"status": log.status,
"articles_fetched": log.articles_fetched,
"response_time_ms": log.response_time_ms,
"error_message": log.error_message,
"created_at": log.created_at.isoformat(),
}
for log, feed_title in logs
]
}
+163
View File
@@ -0,0 +1,163 @@
"""对外 API(供 AI/外部系统调用)"""
from typing import Optional
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import desc
from database import get_db
from models import Article, Feed
router = APIRouter(prefix="/external", tags=["external"])
@router.get("/recent")
def get_recent_articles(
hours: int = 24,
limit: int = 50,
feed_id: Optional[int] = None,
category: Optional[str] = None,
db: Session = Depends(get_db),
):
"""获取最近 N 小时的文章
这是对外提供给 AI 分析的主要接口
"""
since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
query = query.filter(Article.created_at >= since)
if feed_id:
query = query.filter(Article.feed_id == feed_id)
if category:
query = query.filter(Feed.category == category)
rows = query.order_by(desc(Article.published_at)).limit(limit).all()
return {
"query": {
"hours": hours,
"limit": limit,
"feed_id": feed_id,
"category": category,
},
"count": len(rows),
"articles": [
{
"id": article.id,
"title": article.title or "",
"link": article.link,
"author": article.author or "",
"summary": article.summary or "",
"content": article.content or "" if len(article.content or "") < 10000 else article.summary or "",
"published_at": article.published_at.isoformat() if article.published_at else None,
"created_at": article.created_at.isoformat(),
"feed_title": feed_title or "",
"category": category or "",
}
for article, feed_title, category in rows
],
}
@router.get("/feeds")
def get_active_feeds(db: Session = Depends(get_db)):
"""获取所有活跃的 RSS 源列表"""
feeds = db.query(Feed).filter(Feed.is_active == True).all()
return {
"count": len(feeds),
"feeds": [
{
"id": feed.id,
"title": feed.title or feed.url,
"url": feed.url,
"category": feed.category or "",
"article_count": feed.article_count,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
}
for feed in feeds
],
}
@router.get("/feeds/{feed_id}/articles")
def get_feed_articles(
feed_id: int,
limit: int = 100,
since: Optional[str] = None,
db: Session = Depends(get_db),
):
"""获取指定 RSS 源的文章"""
feed = db.query(Feed).filter(Feed.id == feed_id).first()
if not feed:
return {"error": "Feed not found"}
query = db.query(Article).filter(Article.feed_id == feed_id)
if since:
query = query.filter(Article.published_at >= since)
articles = query.order_by(desc(Article.published_at)).limit(limit).all()
return {
"feed": {
"id": feed.id,
"title": feed.title or feed.url,
"url": feed.url,
},
"count": len(articles),
"articles": [
{
"id": article.id,
"title": article.title or "",
"link": article.link,
"author": article.author or "",
"summary": article.summary or "",
"published_at": article.published_at.isoformat() if article.published_at else None,
}
for article in articles
],
}
@router.get("/summary")
def get_daily_summary(
date: Optional[str] = None,
db: Session = Depends(get_db),
):
"""获取指定日期的文章摘要统计
供 AI 快速了解某天的 RSS 内容概况
"""
if date:
try:
day = datetime.strptime(date, "%Y-%m-%d")
next_day = day + timedelta(days=1)
except ValueError:
return {"error": "Invalid date format, use YYYY-MM-DD"}
else:
day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
next_day = day + timedelta(days=1)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
query = query.filter(Article.created_at >= day, Article.created_at < next_day)
rows = query.order_by(desc(Article.published_at)).all()
# 按分类统计
by_category = {}
for article, feed_title, category in rows:
cat = category or "未分类"
if cat not in by_category:
by_category[cat] = []
by_category[cat].append({
"title": article.title or "",
"link": article.link,
"feed": feed_title or "",
"summary": article.summary or "",
})
return {
"date": day.strftime("%Y-%m-%d"),
"total_articles": len(rows),
"by_category": by_category,
}
+273
View File
@@ -0,0 +1,273 @@
"""RSS 源管理 API"""
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, HttpUrl
from sqlalchemy.orm import Session
from database import get_db
from models import Feed
from rss_fetcher import discover_feed_url, fetch_and_store_feed
from scheduler import add_feed_job, remove_feed_job
router = APIRouter(prefix="/feeds", tags=["feeds"])
class FeedCreate(BaseModel):
url: str
title: Optional[str] = ""
description: Optional[str] = ""
category: Optional[str] = ""
is_active: Optional[bool] = True
fetch_interval_minutes: Optional[int] = 60
class FeedUpdate(BaseModel):
title: Optional[str] = None
description: Optional[str] = None
category: Optional[str] = None
is_active: Optional[bool] = None
fetch_interval_minutes: Optional[int] = None
class FeedOut(BaseModel):
id: int
url: str
title: str
description: str
category: str
is_active: bool
fetch_interval_minutes: int
last_fetch_at: Optional[str] = None
last_fetch_status: str
success_count: int
fail_count: int
article_count: int
health_status: str
created_at: str
class Config:
from_attributes = True
@router.get("", response_model=dict)
def list_feeds(
skip: int = 0,
limit: int = 100,
category: Optional[str] = None,
search: Optional[str] = None,
is_active: Optional[bool] = None,
db: Session = Depends(get_db),
):
"""获取 RSS 源列表,支持分页、分类筛选、搜索"""
query = db.query(Feed)
if category:
query = query.filter(Feed.category == category)
if is_active is not None:
query = query.filter(Feed.is_active == is_active)
if search:
query = query.filter(
Feed.title.contains(search) | Feed.url.contains(search) | Feed.description.contains(search)
)
total = query.count()
feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all()
results = []
for feed in feeds:
data = {
"id": feed.id,
"url": feed.url,
"title": feed.title or feed.url,
"description": feed.description or "",
"category": feed.category or "",
"is_active": feed.is_active,
"fetch_interval_minutes": feed.fetch_interval_minutes,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
"last_fetch_status": feed.last_fetch_status,
"success_count": feed.success_count,
"fail_count": feed.fail_count,
"article_count": feed.article_count,
"health_status": feed.health_status(),
"created_at": feed.created_at.isoformat(),
}
results.append(data)
return {"total": total, "items": results}
@router.get("/categories")
def list_categories(db: Session = Depends(get_db)):
"""获取所有分类列表"""
categories = db.query(Feed.category).filter(Feed.category != "").distinct().all()
return [c[0] for c in categories if c[0]]
@router.post("", response_model=dict)
def create_feed(data: FeedCreate, db: Session = Depends(get_db)):
"""添加 RSS 源"""
# 检查是否已存在
existing = db.query(Feed).filter(Feed.url == data.url).first()
if existing:
raise HTTPException(status_code=409, detail="该 RSS 源已存在")
feed = Feed(
url=data.url,
title=data.title or "",
description=data.description or "",
category=data.category or "",
is_active=data.is_active,
fetch_interval_minutes=data.fetch_interval_minutes or 60,
)
db.add(feed)
db.commit()
db.refresh(feed)
# 注册定时任务
if feed.is_active:
add_feed_job(feed.id, feed.fetch_interval_minutes)
# 立即抓取一次
fetch_and_store_feed(feed.id)
return {"id": feed.id, "message": "RSS 源添加成功", "url": feed.url}
@router.post("/discover")
def discover_feed(url: str, db: Session = Depends(get_db)):
"""从网页自动发现 RSS feed URL"""
feed_urls = discover_feed_url(url)
return {"source_url": url, "found_feeds": feed_urls}
@router.get("/{feed_id}", response_model=dict)
def get_feed(feed_id: int, db: Session = Depends(get_db)):
"""获取 RSS 源详情"""
feed = db.query(Feed).filter(Feed.id == feed_id).first()
if not feed:
raise HTTPException(status_code=404, detail="RSS 源不存在")
return {
"id": feed.id,
"url": feed.url,
"title": feed.title or feed.url,
"description": feed.description or "",
"category": feed.category or "",
"is_active": feed.is_active,
"fetch_interval_minutes": feed.fetch_interval_minutes,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
"last_fetch_status": feed.last_fetch_status,
"last_error": feed.last_error,
"success_count": feed.success_count,
"fail_count": feed.fail_count,
"article_count": feed.article_count,
"health_status": feed.health_status(),
"created_at": feed.created_at.isoformat(),
}
@router.put("/{feed_id}", response_model=dict)
def update_feed(feed_id: int, data: FeedUpdate, db: Session = Depends(get_db)):
"""更新 RSS 源"""
feed = db.query(Feed).filter(Feed.id == feed_id).first()
if not feed:
raise HTTPException(status_code=404, detail="RSS 源不存在")
if data.title is not None:
feed.title = data.title
if data.description is not None:
feed.description = data.description
if data.category is not None:
feed.category = data.category
if data.is_active is not None:
feed.is_active = data.is_active
if feed.is_active:
add_feed_job(feed.id, feed.fetch_interval_minutes)
else:
remove_feed_job(feed.id)
if data.fetch_interval_minutes is not None:
feed.fetch_interval_minutes = data.fetch_interval_minutes
if feed.is_active:
add_feed_job(feed.id, feed.fetch_interval_minutes)
db.commit()
return {"message": "RSS 源更新成功"}
@router.delete("/{feed_id}")
def delete_feed(feed_id: int, db: Session = Depends(get_db)):
"""删除 RSS 源(级联删除文章和日志)"""
feed = db.query(Feed).filter(Feed.id == feed_id).first()
if not feed:
raise HTTPException(status_code=404, detail="RSS 源不存在")
remove_feed_job(feed_id)
db.delete(feed)
db.commit()
return {"message": "RSS 源已删除"}
@router.post("/{feed_id}/fetch")
def trigger_fetch(feed_id: int, db: Session = Depends(get_db)):
"""手动触发抓取"""
feed = db.query(Feed).filter(Feed.id == feed_id).first()
if not feed:
raise HTTPException(status_code=404, detail="RSS 源不存在")
result = fetch_and_store_feed(feed_id)
return result
@router.post("/import-opml")
def import_opml(opml_content: str, db: Session = Depends(get_db)):
"""导入 OPML 文件内容"""
import xml.etree.ElementTree as ET
try:
root = ET.fromstring(opml_content)
except ET.ParseError:
raise HTTPException(status_code=400, detail="无效的 OPML 文件")
added = 0
skipped = 0
for outline in root.iter("outline"):
url = outline.get("xmlUrl") or outline.get("xmlurl")
if not url:
continue
existing = db.query(Feed).filter(Feed.url == url).first()
if existing:
skipped += 1
continue
feed = Feed(
url=url,
title=outline.get("title", "") or outline.get("text", ""),
description=outline.get("description", ""),
category=outline.get("category", ""),
is_active=True,
fetch_interval_minutes=60,
)
db.add(feed)
db.commit()
db.refresh(feed)
add_feed_job(feed.id, feed.fetch_interval_minutes)
added += 1
return {"added": added, "skipped": skipped, "message": f"成功导入 {added} 个 RSS 源"}
@router.get("/export-opml")
def export_opml(db: Session = Depends(get_db)):
"""导出 OPML 文件内容"""
feeds = db.query(Feed).all()
lines = ['<?xml version="1.0" encoding="UTF-8"?>', '<opml version="2.0">', '<head><title>rssKeeper Feeds</title></head>', '<body>']
for feed in feeds:
title = (feed.title or feed.url).replace('"', '&quot;')
lines.append(f' <outline type="rss" text="{title}" xmlUrl="{feed.url}" />')
lines.append('</body>')
lines.append('</opml>')
return {"opml": "\n".join(lines)}