Files
rssKeeper/backend/scheduler.py
T
congsh 4286731348 feat: 代理支持、外部API增强、调度器修复、每日文章看板
- 添加 HTTP 代理支持(国内直连、外网走代理)
- 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选
- 修复 APScheduler 线程静默崩溃(_safe_fetch 异常保护)
- 健康检查暴露调度器状态
- Dashboard 新增每日文章数柱状图(按 published_at)
- 文章列表 API 补上 content 字段,日期筛选修复时间范围
- 修复外部 API 双重 external 前缀
- User-Agent 改为 Chrome 标识缓解 403
- 添加完整 API 接口文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-12 09:58:32 +08:00

108 lines
2.9 KiB
Python

"""APScheduler 定时任务管理"""
import logging
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
from rss_fetcher import fetch_and_store_feed
import config
logger = logging.getLogger(__name__)
_scheduler = None
def get_scheduler():
"""获取或创建调度器实例"""
global _scheduler
if _scheduler is None:
_scheduler = BackgroundScheduler(
job_defaults={
"coalesce": True,
"max_instances": 1,
"misfire_grace_time": 300,
},
logger=logger,
)
return _scheduler
def _safe_fetch(feed_id: int):
"""安全包装:防止单个 job 异常导致调度器线程崩溃"""
try:
fetch_and_store_feed(feed_id)
except Exception as e:
logger.error(f"调度抓取失败 feed_id={feed_id}: {e}")
def add_feed_job(feed_id: int, interval_minutes: int):
"""为指定 RSS 源添加定时抓取任务"""
scheduler = get_scheduler()
job_id = f"fetch_feed_{feed_id}"
interval = max(interval_minutes, config.MIN_FETCH_INTERVAL)
existing = scheduler.get_job(job_id)
if existing:
existing.reschedule(trigger=IntervalTrigger(minutes=interval))
return
scheduler.add_job(
_safe_fetch,
trigger=IntervalTrigger(minutes=interval),
id=job_id,
args=[feed_id],
replace_existing=True,
)
def remove_feed_job(feed_id: int):
"""移除指定 RSS 源的定时任务"""
scheduler = get_scheduler()
job_id = f"fetch_feed_{feed_id}"
try:
scheduler.remove_job(job_id)
except Exception:
pass
def start_scheduler():
"""启动调度器"""
scheduler = get_scheduler()
if not scheduler.running:
scheduler.start()
logger.info("调度器已启动")
def stop_scheduler():
"""停止调度器"""
global _scheduler
if _scheduler and _scheduler.running:
_scheduler.shutdown(wait=False)
_scheduler = None
def scheduler_status():
"""获取调度器状态(供健康检查使用)"""
scheduler = get_scheduler()
if not scheduler.running:
return {"running": False, "jobs": 0}
jobs = scheduler.get_jobs()
return {"running": True, "jobs": len(jobs)}
def get_feed_next_run(feed_id: int):
"""获取指定 RSS 源的下一次抓取时间"""
scheduler = get_scheduler()
if not scheduler.running:
return None
job = scheduler.get_job(f"fetch_feed_{feed_id}")
return job.next_run_time if job else None
def init_feed_jobs(db):
"""从数据库加载所有活跃 RSS 源并注册定时任务"""
from models import Feed
feeds = db.query(Feed).filter(Feed.is_active == True).all()
for feed in feeds:
add_feed_job(feed.id, feed.fetch_interval_minutes or config.DEFAULT_FETCH_INTERVAL)
start_scheduler()
logger.info(f"已注册 {len(feeds)} 个定时抓取任务")