feat: 代理支持、外部API增强、调度器修复、每日文章看板

- 添加 HTTP 代理支持(国内直连、外网走代理)
- 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选
- 修复 APScheduler 线程静默崩溃(_safe_fetch 异常保护)
- 健康检查暴露调度器状态
- Dashboard 新增每日文章数柱状图(按 published_at)
- 文章列表 API 补上 content 字段,日期筛选修复时间范围
- 修复外部 API 双重 external 前缀
- User-Agent 改为 Chrome 标识缓解 403
- 添加完整 API 接口文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
congsh
2026-06-12 09:58:32 +08:00
parent 68bba3d9e0
commit 4286731348
12 changed files with 1057 additions and 44 deletions
+4
View File
@@ -6,3 +6,7 @@ DEFAULT_FETCH_INTERVAL=60
MIN_FETCH_INTERVAL=15 MIN_FETCH_INTERVAL=15
MAX_ARTICLE_CONTENT_LENGTH=50000 MAX_ARTICLE_CONTENT_LENGTH=50000
MAX_SUMMARY_LENGTH=500 MAX_SUMMARY_LENGTH=500
# 代理配置(mihomo mixed-port,国内域名自动直连)
HTTP_PROXY=http://192.168.80.1:17890
HTTPS_PROXY=http://192.168.80.1:17890
+4
View File
@@ -16,6 +16,10 @@ FETCH_TIMEOUT = int(os.getenv("FETCH_TIMEOUT", "30"))
DEFAULT_FETCH_INTERVAL = int(os.getenv("DEFAULT_FETCH_INTERVAL", "60")) # 分钟 DEFAULT_FETCH_INTERVAL = int(os.getenv("DEFAULT_FETCH_INTERVAL", "60")) # 分钟
MIN_FETCH_INTERVAL = int(os.getenv("MIN_FETCH_INTERVAL", "15")) # 最小间隔15分钟 MIN_FETCH_INTERVAL = int(os.getenv("MIN_FETCH_INTERVAL", "15")) # 最小间隔15分钟
# 代理配置(用于访问外网源)
HTTP_PROXY = os.getenv("HTTP_PROXY", "")
HTTPS_PROXY = os.getenv("HTTPS_PROXY", "")
# 内容处理 # 内容处理
MAX_ARTICLE_CONTENT_LENGTH = int(os.getenv("MAX_ARTICLE_CONTENT_LENGTH", "50000")) MAX_ARTICLE_CONTENT_LENGTH = int(os.getenv("MAX_ARTICLE_CONTENT_LENGTH", "50000"))
MAX_SUMMARY_LENGTH = int(os.getenv("MAX_SUMMARY_LENGTH", "500")) MAX_SUMMARY_LENGTH = int(os.getenv("MAX_SUMMARY_LENGTH", "500"))
+3 -2
View File
@@ -5,7 +5,7 @@ from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from starlette.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware
from database import init_db, SessionLocal from database import init_db, SessionLocal
from scheduler import init_feed_jobs, stop_scheduler from scheduler import init_feed_jobs, stop_scheduler, scheduler_status
from routers import feeds, articles, dashboard, external_api from routers import feeds, articles, dashboard, external_api
import config import config
@@ -57,7 +57,8 @@ app.include_router(external_api.router, prefix=config.EXTERNAL_API_PREFIX)
@app.get("/api/health") @app.get("/api/health")
def health_check(): def health_check():
"""健康检查""" """健康检查"""
return {"status": "ok", "service": "rssKeeper"} sched = scheduler_status()
return {"status": "ok", "service": "rssKeeper", "scheduler": sched}
# 静态文件服务(前端构建产物)— 必须放在最后,API 路由优先匹配 # 静态文件服务(前端构建产物)— 必须放在最后,API 路由优先匹配
+1
View File
@@ -72,6 +72,7 @@ def list_articles(
"link": article.link, "link": article.link,
"author": article.author or "", "author": article.author or "",
"published_at": article.published_at.isoformat() if article.published_at else None, "published_at": article.published_at.isoformat() if article.published_at else None,
"content": article.content or "",
"summary": article.summary or "", "summary": article.summary or "",
"is_read": article.is_read, "is_read": article.is_read,
"created_at": article.created_at.isoformat(), "created_at": article.created_at.isoformat(),
+20
View File
@@ -1,6 +1,7 @@
"""仪表盘统计 API""" """仪表盘统计 API"""
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import func, text
from database import get_db from database import get_db
from health_checker import get_overall_stats, get_feed_health from health_checker import get_overall_stats, get_feed_health
@@ -31,6 +32,25 @@ def dashboard_health(
return {"total": total, "items": items} return {"total": total, "items": items}
@router.get("/articles-daily")
def articles_daily(days: int = 30, db: Session = Depends(get_db)):
"""按发布日期统计文章数量"""
from models import Article
sql = text("""
SELECT DATE(published_at) as date, COUNT(*) as count
FROM articles
WHERE published_at IS NOT NULL
AND published_at >= DATE('now', '-' || :days || ' days')
GROUP BY DATE(published_at)
ORDER BY date DESC
""")
rows = db.execute(sql, {"days": days}).fetchall()
return {
"days": days,
"data": [{"date": str(r[0]), "count": r[1]} for r in rows],
}
@router.get("/recent-activity") @router.get("/recent-activity")
def recent_activity(limit: int = 20, db: Session = Depends(get_db)): def recent_activity(limit: int = 20, db: Session = Depends(get_db)):
"""最近的抓取活动""" """最近的抓取活动"""
+113 -28
View File
@@ -1,13 +1,14 @@
"""对外 API(供 AI/外部系统调用)""" """对外 API(供 AI/外部系统调用)"""
from typing import Optional from typing import Optional, List
from datetime import datetime, timedelta from datetime import datetime, timedelta
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends, Query
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy import desc from sqlalchemy import desc, or_
from database import get_db from database import get_db
from models import Article, Feed from models import Article, Feed
from fulltext_search import search_articles
router = APIRouter(prefix="/external", tags=["external"]) router = APIRouter(tags=["external"])
@router.get("/recent") @router.get("/recent")
@@ -16,21 +17,28 @@ def get_recent_articles(
limit: int = 50, limit: int = 50,
feed_id: Optional[int] = None, feed_id: Optional[int] = None,
category: Optional[str] = None, category: Optional[str] = None,
search: Optional[str] = None,
unread_only: bool = False,
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""获取最近 N 小时的文章 """获取最近 N 小时的文章
这是对外提供给 AI 分析的主要接口 AI 分析的主要接口,支持多条件组合筛选
""" """
since = datetime.utcnow() - timedelta(hours=hours) since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed) query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
query = query.filter(Article.created_at >= since) query = query.filter(Article.created_at >= since)
if feed_id: if feed_id:
query = query.filter(Article.feed_id == feed_id) query = query.filter(Article.feed_id == feed_id)
if category: if category:
query = query.filter(Feed.category == category) query = query.filter(Feed.category == category)
if search:
query = query.filter(
Article.title.contains(search) | Article.summary.contains(search)
)
if unread_only:
query = query.filter(Article.is_read == False)
rows = query.order_by(desc(Article.published_at)).limit(limit).all() rows = query.order_by(desc(Article.published_at)).limit(limit).all()
@@ -40,6 +48,8 @@ def get_recent_articles(
"limit": limit, "limit": limit,
"feed_id": feed_id, "feed_id": feed_id,
"category": category, "category": category,
"search": search,
"unread_only": unread_only,
}, },
"count": len(rows), "count": len(rows),
"articles": [ "articles": [
@@ -60,24 +70,86 @@ def get_recent_articles(
} }
@router.get("/feeds") @router.get("/search")
def get_active_feeds(db: Session = Depends(get_db)): def fulltext_search(
"""获取所有活跃的 RSS 源列表""" q: str = Query(..., description="搜索关键词"),
feeds = db.query(Feed).filter(Feed.is_active == True).all() limit: int = Query(50, ge=1, le=200),
offset: int = Query(0, ge=0),
category: Optional[str] = Query(None, description="按分类筛选"),
feed_id: Optional[int] = Query(None, description="按源筛选"),
db: Session = Depends(get_db),
):
"""全文搜索文章(FTS5
供 AI 按关键词检索文章内容
"""
results, total = search_articles(q, limit, offset)
# 二次过滤分类和源
if category or feed_id:
filtered = []
for r in results:
if category and r["category"] != category:
continue
if feed_id and r["feed_id"] != feed_id:
continue
filtered.append(r)
results = filtered
total = len(filtered)
return { return {
"count": len(feeds), "query": q,
"feeds": [ "total": total,
{ "offset": offset,
"id": feed.id, "limit": limit,
"title": feed.title or feed.url, "articles": results,
"url": feed.url, }
"category": feed.category or "",
"article_count": feed.article_count,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None, @router.get("/feeds")
} def get_active_feeds(
for feed in feeds health_status: Optional[str] = Query(None, description="按健康度筛选: healthy/warning/unhealthy/unknown"),
], category: Optional[str] = Query(None, description="按分类筛选"),
error_type: Optional[str] = Query(None, description="按错误类型筛选"),
is_active: Optional[bool] = Query(None, description="按启用状态筛选"),
db: Session = Depends(get_db),
):
"""获取 RSS 源列表(支持多条件筛选)"""
query = db.query(Feed)
if is_active is not None:
query = query.filter(Feed.is_active == is_active)
else:
query = query.filter(Feed.is_active == True)
if category:
query = query.filter(Feed.category == category)
feeds = query.all()
results = []
for feed in feeds:
status = feed.health_status()
if health_status and status != health_status:
continue
if error_type and feed.error_type != error_type:
continue
results.append({
"id": feed.id,
"title": feed.title or feed.url,
"url": feed.url,
"category": feed.category or "",
"is_active": feed.is_active,
"health_status": status,
"error_type": feed.error_type,
"article_count": feed.article_count,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
"last_error": feed.last_error or "",
})
return {
"count": len(results),
"feeds": results,
} }
@@ -86,6 +158,8 @@ def get_feed_articles(
feed_id: int, feed_id: int,
limit: int = 100, limit: int = 100,
since: Optional[str] = None, since: Optional[str] = None,
search: Optional[str] = None,
unread_only: bool = False,
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""获取指定 RSS 源的文章""" """获取指定 RSS 源的文章"""
@@ -97,6 +171,12 @@ def get_feed_articles(
if since: if since:
query = query.filter(Article.published_at >= since) query = query.filter(Article.published_at >= since)
if search:
query = query.filter(
Article.title.contains(search) | Article.summary.contains(search)
)
if unread_only:
query = query.filter(Article.is_read == False)
articles = query.order_by(desc(Article.published_at)).limit(limit).all() articles = query.order_by(desc(Article.published_at)).limit(limit).all()
@@ -124,6 +204,7 @@ def get_feed_articles(
@router.get("/summary") @router.get("/summary")
def get_daily_summary( def get_daily_summary(
date: Optional[str] = None, date: Optional[str] = None,
category: Optional[str] = None,
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""获取指定日期的文章摘要统计 """获取指定日期的文章摘要统计
@@ -141,15 +222,19 @@ def get_daily_summary(
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed) query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
query = query.filter(Article.created_at >= day, Article.created_at < next_day) query = query.filter(Article.created_at >= day, Article.created_at < next_day)
if category:
query = query.filter(Feed.category == category)
rows = query.order_by(desc(Article.published_at)).all() rows = query.order_by(desc(Article.published_at)).all()
# 按分类统计
by_category = {} by_category = {}
for article, feed_title, category in rows: for article, feed_title, cat in rows:
cat = category or "未分类" c = cat or "未分类"
if cat not in by_category: if category and c != category:
by_category[cat] = [] continue
by_category[cat].append({ if c not in by_category:
by_category[c] = []
by_category[c].append({
"title": article.title or "", "title": article.title or "",
"link": article.link, "link": article.link,
"feed": feed_title or "", "feed": feed_title or "",
+33 -4
View File
@@ -13,6 +13,35 @@ from models import Feed, Article, FetchLog
from database import SessionLocal from database import SessionLocal
import config import config
# 国内域名后缀/关键字 — 这些直连,其余走代理
CN_DOMAINS = (
".cn", ".com.cn", ".org.cn", ".net.cn",
"36kr.com", "zhihu.com", "weibo.com", "douban.com", "bilibili.com",
"tmtpost.com", "ifanr.com", "geekpark.net", "pingwest.com",
"juejin.cn", "segmentfault.com", "cnblogs.com", "csdn.net",
"qq.com", "163.com", "sohu.com", "sina.com.cn", "baidu.com",
"taobao.com", "jd.com", "aliyun.com",
"xinhuanet.com", "people.com.cn", "sciencenet.cn",
"localhost", "127.0.0.1", "192.168.",
)
def _get_proxies(url: str) -> dict:
"""根据 URL 判断是否需要代理,返回 proxies dict"""
if not config.HTTPS_PROXY:
return {}
from urllib.parse import urlparse
host = urlparse(url).hostname or ""
# 国内域名直连
for d in CN_DOMAINS:
if host.endswith(d) or host == d:
return {}
# 外网走代理
return {
"http": config.HTTP_PROXY or config.HTTPS_PROXY,
"https": config.HTTPS_PROXY,
}
def classify_error(error: str) -> str: def classify_error(error: str) -> str:
"""根据错误信息分类错误类型""" """根据错误信息分类错误类型"""
@@ -54,10 +83,10 @@ def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
start_time = time.time() start_time = time.time()
try: try:
headers = { headers = {
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
"Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*", "Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*",
} }
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True) response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
response.raise_for_status() response.raise_for_status()
# 解析 RSS # 解析 RSS
@@ -87,9 +116,9 @@ def discover_feed_url(url: str, timeout: int = 15) -> list:
""" """
try: try:
headers = { headers = {
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
} }
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True) response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
response.raise_for_status() response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
+31 -7
View File
@@ -1,9 +1,12 @@
"""APScheduler 定时任务管理""" """APScheduler 定时任务管理"""
import logging
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger from apscheduler.triggers.interval import IntervalTrigger
from rss_fetcher import fetch_and_store_feed from rss_fetcher import fetch_and_store_feed
import config import config
logger = logging.getLogger(__name__)
_scheduler = None _scheduler = None
@@ -11,32 +14,42 @@ def get_scheduler():
"""获取或创建调度器实例""" """获取或创建调度器实例"""
global _scheduler global _scheduler
if _scheduler is None: if _scheduler is None:
_scheduler = BackgroundScheduler() _scheduler = BackgroundScheduler(
job_defaults={
"coalesce": True,
"max_instances": 1,
"misfire_grace_time": 300,
},
logger=logger,
)
return _scheduler return _scheduler
def _safe_fetch(feed_id: int):
"""安全包装:防止单个 job 异常导致调度器线程崩溃"""
try:
fetch_and_store_feed(feed_id)
except Exception as e:
logger.error(f"调度抓取失败 feed_id={feed_id}: {e}")
def add_feed_job(feed_id: int, interval_minutes: int): def add_feed_job(feed_id: int, interval_minutes: int):
"""为指定 RSS 源添加定时抓取任务""" """为指定 RSS 源添加定时抓取任务"""
scheduler = get_scheduler() scheduler = get_scheduler()
job_id = f"fetch_feed_{feed_id}" job_id = f"fetch_feed_{feed_id}"
# 确保间隔不低于最小值
interval = max(interval_minutes, config.MIN_FETCH_INTERVAL) interval = max(interval_minutes, config.MIN_FETCH_INTERVAL)
# 如果任务已存在则更新
existing = scheduler.get_job(job_id) existing = scheduler.get_job(job_id)
if existing: if existing:
existing.reschedule(trigger=IntervalTrigger(minutes=interval)) existing.reschedule(trigger=IntervalTrigger(minutes=interval))
return return
scheduler.add_job( scheduler.add_job(
fetch_and_store_feed, _safe_fetch,
trigger=IntervalTrigger(minutes=interval), trigger=IntervalTrigger(minutes=interval),
id=job_id, id=job_id,
args=[feed_id], args=[feed_id],
replace_existing=True, replace_existing=True,
misfire_grace_time=300, # 5分钟容错
coalesce=True, # 合并错过的任务
) )
@@ -55,6 +68,7 @@ def start_scheduler():
scheduler = get_scheduler() scheduler = get_scheduler()
if not scheduler.running: if not scheduler.running:
scheduler.start() scheduler.start()
logger.info("调度器已启动")
def stop_scheduler(): def stop_scheduler():
@@ -65,6 +79,15 @@ def stop_scheduler():
_scheduler = None _scheduler = None
def scheduler_status():
"""获取调度器状态(供健康检查使用)"""
scheduler = get_scheduler()
if not scheduler.running:
return {"running": False, "jobs": 0}
jobs = scheduler.get_jobs()
return {"running": True, "jobs": len(jobs)}
def get_feed_next_run(feed_id: int): def get_feed_next_run(feed_id: int):
"""获取指定 RSS 源的下一次抓取时间""" """获取指定 RSS 源的下一次抓取时间"""
scheduler = get_scheduler() scheduler = get_scheduler()
@@ -81,3 +104,4 @@ def init_feed_jobs(db):
for feed in feeds: for feed in feeds:
add_feed_job(feed.id, feed.fetch_interval_minutes or config.DEFAULT_FETCH_INTERVAL) add_feed_job(feed.id, feed.fetch_interval_minutes or config.DEFAULT_FETCH_INTERVAL)
start_scheduler() start_scheduler()
logger.info(f"已注册 {len(feeds)} 个定时抓取任务")
+750
View File
@@ -0,0 +1,750 @@
# RSSKeeper API 接口文档
Base URL: `http://<host>:7329`
所有接口返回 JSON 格式数据。
---
## 目录
- [健康检查](#健康检查)
- [RSS 源管理](#rss-源管理)
- [获取源列表](#获取源列表)
- [获取源详情](#获取源详情)
- [添加源](#添加源)
- [更新源](#更新源)
- [删除源](#删除源)
- [触发抓取](#触发抓取)
- [批量抓取](#批量抓取)
- [自动发现](#自动发现)
- [获取分类列表](#获取分类列表)
- [导入 OPML](#导入-opml)
- [导出 OPML](#导出-opml)
- [文章管理](#文章管理)
- [获取文章列表](#获取文章列表)
- [获取文章详情](#获取文章详情)
- [全文搜索](#全文搜索)
- [标记已读](#标记已读)
- [仪表盘](#仪表盘)
- [统计概览](#统计概览)
- [健康度详情](#健康度详情)
- [最近活动](#最近活动)
- [外部 API(供 AI 集成)](#外部-api)
- [获取最近文章](#获取最近文章)
- [全文搜索](#全文搜索external)
- [获取源列表(含筛选)](#获取源列表含筛选)
- [获取指定源文章](#获取指定源文章)
- [获取每日摘要](#获取每日摘要)
---
## 通用字段说明
### 健康度 (health_status)
| 值 | 含义 |
|---|------|
| `healthy` | 健康:成功率 >= 90%,7天内有抓取 |
| `warning` | 警告:成功率 50%-90%,或超过3天未更新 |
| `unhealthy` | 异常:成功率 < 50%,或超过7天未更新 |
| `unknown` | 未知:尚未进行过任何抓取 |
### 错误类型 (error_type)
| 值 | 含义 |
|---|------|
| `url_invalid` | URL 已失效(404 |
| `forbidden` | 被站点拒绝(403 |
| `rate_limited` | 频率限制(429 |
| `timeout` | 连接超时 |
| `dns_failure` | DNS 解析失败 |
| `connection_refused` | 连接被拒绝 |
| `connection_reset` | 连接中断 |
| `ssl_error` | SSL/TLS 错误 |
| `unreachable` | 服务器不可达 |
| `url_malformed` | URL 格式错误 |
| `server_error` | 服务器错误(5xx |
| `unknown` | 其他未知错误 |
---
## 健康检查
### `GET /api/health`
检查服务是否运行。
**响应:**
```json
{
"status": "ok",
"service": "rssKeeper"
}
```
---
## RSS 源管理
### 获取源列表
### `GET /api/feeds`
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `skip` | int | 否 | 0 | 跳过条数(分页偏移) |
| `limit` | int | 否 | 100 | 每页条数 |
| `category` | string | 否 | - | 按分类筛选 |
| `search` | string | 否 | - | 按名称/URL/描述搜索 |
| `is_active` | bool | 否 | - | 按启用状态筛选 |
| `health_status` | string | 否 | - | 按健康度筛选:`healthy`/`warning`/`unhealthy`/`unknown` |
**响应:**
```json
{
"total": 383,
"items": [
{
"id": 1,
"url": "https://example.com/feed.xml",
"title": "Example Feed",
"description": "Feed description",
"category": "科技",
"is_active": true,
"fetch_interval_minutes": 60,
"last_fetch_at": "2026-06-11T08:33:36.474905",
"last_fetch_status": "success",
"last_error": "",
"error_type": "",
"success_count": 5,
"fail_count": 0,
"article_count": 42,
"health_status": "healthy",
"next_fetch_time": "2026-06-11T09:33:36.000000+00:00",
"created_at": "2026-06-11T08:33:24.591074"
}
]
}
```
---
### 获取源详情
### `GET /api/feeds/{feed_id}`
**路径参数:**
| 参数 | 类型 | 说明 |
|------|------|------|
| `feed_id` | int | RSS 源 ID |
**响应:** 同列表中的单条 items 结构。
---
### 添加源
### `POST /api/feeds`
**请求体:**
```json
{
"url": "https://example.com/feed.xml",
"title": "Example",
"description": "",
"category": "科技",
"is_active": true,
"fetch_interval_minutes": 60
}
```
| 字段 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `url` | string | **是** | - | RSS 源地址 |
| `title` | string | 否 | "" | 源名称(留空则自动抓取) |
| `description` | string | 否 | "" | 描述 |
| `category` | string | 否 | "" | 分类 |
| `is_active` | bool | 否 | true | 是否启用 |
| `fetch_interval_minutes` | int | 否 | 60 | 抓取间隔(分钟),最小 15 |
**响应:**
```json
{
"id": 1,
"message": "RSS 源添加成功,正在后台抓取",
"url": "https://example.com/feed.xml"
}
```
添加成功后会自动在后台触发首次抓取。
**错误:** `409` — 该 RSS 源已存在
---
### 更新源
### `PUT /api/feeds/{feed_id}`
**请求体(只需传要修改的字段):**
```json
{
"title": "新名称",
"category": "新闻",
"is_active": false,
"fetch_interval_minutes": 120
}
```
**响应:**
```json
{
"message": "RSS 源更新成功"
}
```
---
### 删除源
### `DELETE /api/feeds/{feed_id}`
删除 RSS 源,**级联删除**关联的所有文章和抓取日志。
**响应:**
```json
{
"message": "RSS 源已删除"
}
```
---
### 触发抓取
### `POST /api/feeds/{feed_id}/fetch`
手动触发单个源的抓取。同步执行,返回抓取结果。
**响应:**
```json
{
"success": true,
"articles_count": 5,
"feed_title": "Example Feed"
}
```
---
### 批量抓取
### `POST /api/feeds/batch-fetch`
并发同步抓取多个源。适用于"全部抓取"等场景。
**请求体:**
```json
{
"feed_ids": [1, 2, 3, 4, 5]
}
```
**响应:**
```json
{
"message": "完成:4 个成功,1 个失败",
"total": 5,
"success": 4,
"fail": 1
}
```
---
### 自动发现
### `POST /api/feeds/discover`
从任意网页自动发现 RSS/Atom feed URL。
**参数:**
| 参数 | 类型 | 必填 | 说明 |
|------|------|------|------|
| `url` | string | **是** | 网页地址 |
**响应:**
```json
{
"source_url": "https://example.com",
"found_feeds": [
"https://example.com/feed.xml",
"https://example.com/rss"
]
}
```
---
### 获取分类列表
### `GET /api/feeds/categories`
返回所有已使用的分类。
**响应:**
```json
["科技", "新闻", "设计"]
```
---
### 导入 OPML
### `POST /api/feeds/import-opml`
从 OPML 内容批量导入 RSS 源。
**请求体:**
```json
{
"opml_content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><opml version=\"2.0\">..."
}
```
| 字段 | 类型 | 必填 | 说明 |
|------|------|------|------|
| `opml_content` | string | **是** | OPML 文件内容(最大 5MB |
**响应:**
```json
{
"added": 15,
"skipped": 3,
"message": "成功导入 15 个 RSS 源"
}
```
---
### 导出 OPML
### `GET /api/feeds/export-opml`
导出所有 RSS 源为 OPML 格式。
**响应:**
```json
{
"opml": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><opml version=\"2.0\">..."
}
```
---
## 文章管理
### 获取文章列表
### `GET /api/articles`
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `skip` | int | 否 | 0 | 分页偏移 |
| `limit` | int | 否 | 50 | 每页条数 |
| `feed_id` | int | 否 | - | 按源筛选 |
| `category` | string | 否 | - | 按分类筛选 |
| `search` | string | 否 | - | 按标题/链接搜索 |
| `since` | string | 否 | - | 起始时间(ISO 格式) |
| `until` | string | 否 | - | 截止时间(ISO 格式) |
| `is_read` | bool | 否 | - | 按已读状态筛选 |
**响应:**
```json
{
"total": 120,
"items": [
{
"id": 1,
"feed_id": 1,
"title": "文章标题",
"link": "https://example.com/article",
"author": "作者",
"published_at": "2026-06-11T06:00:00",
"content": "文章正文内容...",
"summary": "文章摘要...",
"is_read": false,
"created_at": "2026-06-11T08:33:36.474905",
"feed_title": "Example Feed",
"category": "科技"
}
]
}
```
---
### 获取文章详情
### `GET /api/articles/{article_id}`
**响应:** 同列表中的单条 items 结构。
---
### 全文搜索
### `GET /api/articles/search/fulltext`
使用 SQLite FTS5 进行全文搜索。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `q` | string | **是** | - | 搜索关键词 |
| `skip` | int | 否 | 0 | 分页偏移 |
| `limit` | int | 否 | 50 | 每页条数 |
**响应:** 同文章列表格式。
---
### 标记已读
### `PUT /api/articles/{article_id}/read`
**响应:**
```json
{
"message": "已标记为已读"
}
```
---
## 仪表盘
### 统计概览
### `GET /api/dashboard/stats`
**响应:**
```json
{
"total_feeds": 383,
"active_feeds": 383,
"total_articles": 1024,
"healthy_feeds": 202,
"warning_feeds": 0,
"unhealthy_feeds": 167,
"today_fetches": 45,
"today_success": 40,
"today_success_rate": 88.9
}
```
---
### 健康度详情
### `GET /api/dashboard/health`
获取每个 RSS 源的健康状态详情。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `skip` | int | 否 | 0 | 分页偏移 |
| `limit` | int | 否 | 100 | 每页条数 |
**响应:**
```json
{
"total": 383,
"items": [
{
"id": 1,
"title": "Example Feed",
"url": "https://example.com/feed.xml",
"is_active": true,
"health_status": "healthy",
"health_label": "健康",
"success_rate": 100.0,
"success_count": 5,
"fail_count": 0,
"total_fetches": 5,
"last_fetch_at": "2026-06-11T08:33:36.474905",
"days_since_fetch": 0,
"article_count": 42,
"last_error": "",
"recent_logs": [
{
"status": "success",
"articles_fetched": 3,
"response_time_ms": 450,
"created_at": "2026-06-11T08:33:36.474905",
"error_message": null
}
]
}
]
}
```
---
### 最近活动
### `GET /api/dashboard/recent-activity`
获取最近的抓取活动日志。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `limit` | int | 否 | 20 | 返回条数 |
**响应:**
```json
{
"items": [
{
"id": 1,
"feed_id": 1,
"feed_title": "Example Feed",
"status": "success",
"articles_fetched": 3,
"error_message": "",
"response_time_ms": 450,
"created_at": "2026-06-11T08:33:36.474905"
}
]
}
```
---
## 外部 API
供 AI 助手、外部系统调用的接口。前缀:`/api/v1/external`
### 获取最近文章
### `GET /api/v1/external/recent`
获取最近 N 小时的文章,支持多条件组合筛选。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `hours` | int | 否 | 24 | 回溯小时数 |
| `limit` | int | 否 | 50 | 最大返回条数 |
| `feed_id` | int | 否 | - | 按源 ID 筛选 |
| `category` | string | 否 | - | 按分类筛选 |
| `search` | string | 否 | - | 按标题/摘要关键词筛选 |
| `unread_only` | bool | 否 | false | 只返回未读文章 |
**响应:**
```json
{
"query": { "hours": 24, "limit": 50, "feed_id": null, "category": null, "search": null, "unread_only": false },
"count": 15,
"articles": [
{
"id": 1,
"title": "文章标题",
"link": "https://example.com/article",
"author": "作者",
"summary": "摘要文本",
"content": "正文内容(超过10000字符时返回摘要)",
"published_at": "2026-06-11T06:00:00",
"created_at": "2026-06-11T08:33:36",
"feed_title": "Example Feed",
"category": "科技"
}
]
}
```
---
### 全文搜索
### `GET /api/v1/external/search`
使用 FTS5 全文搜索引擎检索文章内容。**供 AI 按关键词精准查找文章**。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `q` | string | **是** | - | 搜索关键词 |
| `limit` | int | 否 | 50 | 最大返回条数(1-200 |
| `offset` | int | 否 | 0 | 分页偏移 |
| `category` | string | 否 | - | 按分类二次筛选 |
| `feed_id` | int | 否 | - | 按源 ID 二次筛选 |
**响应:**
```json
{
"query": "LLM",
"total": 12,
"offset": 0,
"limit": 50,
"articles": [
{
"id": 15674,
"title": "文章标题",
"summary": "匹配的摘要...",
"link": "https://example.com/article",
"published_at": "2026-06-11T06:00:00",
"created_at": "2026-06-11T08:33:36",
"feed_id": 1,
"feed_title": "Example Feed",
"category": "科技"
}
]
}
```
---
### 获取源列表(含筛选)
### `GET /api/v1/external/feeds`
获取 RSS 源列表,支持按健康度、错误类型、分类等多维度筛选。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `health_status` | string | 否 | - | 按健康度筛选:`healthy`/`warning`/`unhealthy`/`unknown` |
| `category` | string | 否 | - | 按分类筛选 |
| `error_type` | string | 否 | - | 按错误类型筛选(见通用字段说明) |
| `is_active` | bool | 否 | true | 按启用状态筛选 |
**响应:**
```json
{
"count": 167,
"feeds": [
{
"id": 1,
"title": "Example Feed",
"url": "https://example.com/feed.xml",
"category": "科技",
"is_active": true,
"health_status": "unhealthy",
"error_type": "timeout",
"article_count": 42,
"last_fetch_at": "2026-06-11T08:33:36",
"last_error": "HTTPSConnectionPool..."
}
]
}
```
**示例:**
- 查看所有异常源:`/api/v1/external/feeds?health_status=unhealthy`
- 查看 URL 失效的源:`/api/v1/external/feeds?error_type=url_invalid`
- 查看指定分类:`/api/v1/external/feeds?category=科技`
---
### 获取指定源文章
### `GET /api/v1/external/feeds/{feed_id}/articles`
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `feed_id` | int | **是** | - | RSS 源 ID(路径参数) |
| `limit` | int | 否 | 100 | 最大返回条数 |
| `since` | string | 否 | - | 起始时间过滤 |
| `search` | string | 否 | - | 按标题/摘要关键词筛选 |
| `unread_only` | bool | 否 | false | 只返回未读文章 |
**响应:**
```json
{
"feed": { "id": 1, "title": "Example Feed", "url": "https://example.com/feed.xml" },
"count": 42,
"articles": [
{
"id": 1,
"title": "文章标题",
"link": "https://example.com/article",
"author": "作者",
"summary": "摘要",
"published_at": "2026-06-11T06:00:00"
}
]
}
```
---
### 获取每日摘要
### `GET /api/v1/external/summary`
获取指定日期的文章摘要,按分类分组。
**参数:**
| 参数 | 类型 | 必填 | 默认值 | 说明 |
|------|------|------|--------|------|
| `date` | string | 否 | 今天 | 日期,格式 `YYYY-MM-DD` |
| `category` | string | 否 | - | 按分类筛选 |
**响应:**
```json
{
"date": "2026-06-11",
"total_articles": 35,
"by_category": {
"科技": [
{ "title": "文章标题", "link": "https://...", "feed": "Feed 名称", "summary": "文章摘要..." }
],
"新闻": [...]
}
}
```
+2
View File
@@ -43,6 +43,7 @@ export const articlesApi = {
list: (params = {}) => api.get('/api/articles', { params }), list: (params = {}) => api.get('/api/articles', { params }),
get: (id) => api.get(`/api/articles/${id}`), get: (id) => api.get(`/api/articles/${id}`),
search: (q) => api.get('/api/articles/search/fulltext', { params: { q } }), search: (q) => api.get('/api/articles/search/fulltext', { params: { q } }),
searchFulltext: (params = {}) => api.get('/api/articles/search/fulltext', { params }),
markRead: (id) => api.put(`/api/articles/${id}/read`), markRead: (id) => api.put(`/api/articles/${id}/read`),
} }
@@ -51,6 +52,7 @@ export const dashboardApi = {
stats: () => api.get('/api/dashboard/stats'), stats: () => api.get('/api/dashboard/stats'),
health: (params = {}) => api.get('/api/dashboard/health', { params }), health: (params = {}) => api.get('/api/dashboard/health', { params }),
recentActivity: () => api.get('/api/dashboard/recent-activity'), recentActivity: () => api.get('/api/dashboard/recent-activity'),
articlesDaily: (params = {}) => api.get('/api/dashboard/articles-daily', { params }),
} }
// 对外 API // 对外 API
+6 -3
View File
@@ -118,13 +118,16 @@ const loadArticles = async () => {
if (filterFeed.value) params.feed_id = filterFeed.value if (filterFeed.value) params.feed_id = filterFeed.value
if (filterCategory.value) params.category = filterCategory.value if (filterCategory.value) params.category = filterCategory.value
if (dateRange.value && dateRange.value[0]) { if (dateRange.value && dateRange.value[0]) {
params.since = dateRange.value[0] params.since = dateRange.value[0] + 'T00:00:00'
params.until = dateRange.value[1] params.until = dateRange.value[1] + 'T23:59:59'
} }
// 如果有搜索词,使用全文搜索 // 如果有搜索词,使用全文搜索
if (searchQuery.value && searchQuery.value.trim()) { if (searchQuery.value && searchQuery.value.trim()) {
const res = await articlesApi.search(searchQuery.value.trim()) const searchParams = { q: searchQuery.value.trim(), limit: pageSize.value, offset: (page.value - 1) * pageSize.value }
if (filterFeed.value) searchParams.feed_id = filterFeed.value
if (filterCategory.value) searchParams.category = filterCategory.value
const res = await articlesApi.searchFulltext(searchParams)
articles.value = res.items || [] articles.value = res.items || []
stats.value = { total: res.total } stats.value = { total: res.total }
} else { } else {
+90
View File
@@ -82,6 +82,26 @@
</el-col> </el-col>
</el-row> </el-row>
<!-- 每日文章统计 -->
<el-row style="margin-top: 20px;">
<el-col :span="24">
<div class="dark-card">
<div class="dark-card-header">
<span>📈 每日文章数按发布日期</span>
</div>
<div class="daily-chart" v-loading="loadingDaily">
<div v-for="d in dailyData" :key="d.date" class="daily-bar-wrap">
<div class="daily-bar" :style="{ height: barHeight(d.count) + 'px' }">
<span class="daily-count">{{ d.count }}</span>
</div>
<div class="daily-date">{{ shortDate(d.date) }}</div>
</div>
<div v-if="!dailyData.length && !loadingDaily" class="empty-row">暂无数据</div>
</div>
</div>
</el-col>
</el-row>
<!-- 分类分布 --> <!-- 分类分布 -->
<el-row style="margin-top: 20px;"> <el-row style="margin-top: 20px;">
<el-col :span="24"> <el-col :span="24">
@@ -112,6 +132,8 @@ const recentActivity = ref([])
const loadingHealth = ref(false) const loadingHealth = ref(false)
const loadingActivity = ref(false) const loadingActivity = ref(false)
const categoryStats = ref([]) const categoryStats = ref([])
const dailyData = ref([])
const loadingDaily = ref(false)
const statsCards = computed(() => [ const statsCards = computed(() => [
{ key: 'feeds', label: 'RSS 源总数', value: stats.value.total_feeds || 0, color: '#63b3ed' }, { key: 'feeds', label: 'RSS 源总数', value: stats.value.total_feeds || 0, color: '#63b3ed' },
@@ -185,6 +207,29 @@ const loadActivity = async () => {
} }
} }
const loadDaily = async () => {
loadingDaily.value = true
try {
const res = await dashboardApi.articlesDaily({ days: 14 })
dailyData.value = (res.data || []).reverse()
} catch (e) {
console.error('加载每日统计失败', e)
} finally {
loadingDaily.value = false
}
}
const barHeight = (count) => {
const max = Math.max(...dailyData.value.map(d => d.count), 1)
return Math.max(4, Math.round((count / max) * 120))
}
const shortDate = (date) => {
if (!date) return ''
const d = new Date(date)
return `${d.getMonth() + 1}/${d.getDate()}`
}
const loadCategories = async () => { const loadCategories = async () => {
try { try {
const feeds = await feedsApi.list({ limit: 1000 }) const feeds = await feedsApi.list({ limit: 1000 })
@@ -203,6 +248,7 @@ onMounted(() => {
loadStats() loadStats()
loadHealth() loadHealth()
loadActivity() loadActivity()
loadDaily()
loadCategories() loadCategories()
}) })
</script> </script>
@@ -250,6 +296,50 @@ onMounted(() => {
overflow: hidden; overflow: hidden;
} }
/* 每日文章柱状图 */
.daily-chart {
display: flex;
align-items: flex-end;
gap: 6px;
padding: 20px 16px 8px;
height: 200px;
overflow-x: auto;
}
.daily-bar-wrap {
display: flex;
flex-direction: column;
align-items: center;
flex: 1;
min-width: 32px;
}
.daily-bar {
width: 100%;
max-width: 40px;
background: linear-gradient(180deg, #63b3ed, #3182ce);
border-radius: 3px 3px 0 0;
position: relative;
min-height: 4px;
}
.daily-count {
position: absolute;
top: -18px;
left: 50%;
transform: translateX(-50%);
font-size: 11px;
color: var(--text-secondary);
white-space: nowrap;
}
.daily-date {
font-size: 10px;
color: var(--text-secondary);
margin-top: 4px;
white-space: nowrap;
}
.dark-card-header { .dark-card-header {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;