feat: 代理支持、外部API增强、调度器修复、每日文章看板

- 添加 HTTP 代理支持(国内直连、外网走代理)
- 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选
- 修复 APScheduler 线程静默崩溃(_safe_fetch 异常保护)
- 健康检查暴露调度器状态
- Dashboard 新增每日文章数柱状图(按 published_at)
- 文章列表 API 补上 content 字段,日期筛选修复时间范围
- 修复外部 API 双重 external 前缀
- User-Agent 改为 Chrome 标识缓解 403
- 添加完整 API 接口文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
congsh
2026-06-12 09:58:32 +08:00
parent 68bba3d9e0
commit 4286731348
12 changed files with 1057 additions and 44 deletions
+31 -7
View File
@@ -1,9 +1,12 @@
"""APScheduler 定时任务管理"""
import logging
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from rss_fetcher import fetch_and_store_feed
import config
logger = logging.getLogger(__name__)
_scheduler = None
@@ -11,32 +14,42 @@ def get_scheduler():
"""获取或创建调度器实例"""
global _scheduler
if _scheduler is None:
_scheduler = BackgroundScheduler()
_scheduler = BackgroundScheduler(
job_defaults={
"coalesce": True,
"max_instances": 1,
"misfire_grace_time": 300,
},
logger=logger,
)
return _scheduler
def _safe_fetch(feed_id: int):
"""安全包装:防止单个 job 异常导致调度器线程崩溃"""
try:
fetch_and_store_feed(feed_id)
except Exception as e:
logger.error(f"调度抓取失败 feed_id={feed_id}: {e}")
def add_feed_job(feed_id: int, interval_minutes: int):
"""为指定 RSS 源添加定时抓取任务"""
scheduler = get_scheduler()
job_id = f"fetch_feed_{feed_id}"
# 确保间隔不低于最小值
interval = max(interval_minutes, config.MIN_FETCH_INTERVAL)
# 如果任务已存在则更新
existing = scheduler.get_job(job_id)
if existing:
existing.reschedule(trigger=IntervalTrigger(minutes=interval))
return
scheduler.add_job(
fetch_and_store_feed,
_safe_fetch,
trigger=IntervalTrigger(minutes=interval),
id=job_id,
args=[feed_id],
replace_existing=True,
misfire_grace_time=300, # 5分钟容错
coalesce=True, # 合并错过的任务
)
@@ -55,6 +68,7 @@ def start_scheduler():
scheduler = get_scheduler()
if not scheduler.running:
scheduler.start()
logger.info("调度器已启动")
def stop_scheduler():
@@ -65,6 +79,15 @@ def stop_scheduler():
_scheduler = None
def scheduler_status():
"""获取调度器状态(供健康检查使用)"""
scheduler = get_scheduler()
if not scheduler.running:
return {"running": False, "jobs": 0}
jobs = scheduler.get_jobs()
return {"running": True, "jobs": len(jobs)}
def get_feed_next_run(feed_id: int):
"""获取指定 RSS 源的下一次抓取时间"""
scheduler = get_scheduler()
@@ -81,3 +104,4 @@ def init_feed_jobs(db):
for feed in feeds:
add_feed_job(feed.id, feed.fetch_interval_minutes or config.DEFAULT_FETCH_INTERVAL)
start_scheduler()
logger.info(f"已注册 {len(feeds)} 个定时抓取任务")