feat: 深色主题UI、错误分类、批量抓取、健康度筛选

- 修复 datetime 时区不一致导致所有API 500错误的问题
- Feeds/Dashboard 页面改为深色表格主题,高对比度文字
- 添加错误类型自动分类(URL失效/被拒绝/超时/DNS失败/SSL错误等12种)
- 新增"下次抓取时间"列,从APScheduler获取
- 新增健康度筛选下拉,修复分页后过滤失效的bug
- "全部抓取"改为同步并发执行,基于当前筛选条件获取所有匹配源
- 新增数据库自动迁移机制,处理增量列变更

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
congsh
2026-06-11 17:44:54 +08:00
parent c59dd304f7
commit 68bba3d9e0
12 changed files with 846 additions and 192 deletions
+39
View File
@@ -35,9 +35,48 @@ def init_db():
from models import Feed, Article, FetchLog # noqa
Base.metadata.create_all(bind=engine)
_migrate(engine)
init_fts5()
def _migrate(engine):
"""处理数据库增量迁移(添加新列)"""
import logging
logger = logging.getLogger(__name__)
conn = engine.raw_connection()
cursor = conn.cursor()
# 获取 feeds 表现有列
cursor.execute("PRAGMA table_info(feeds)")
existing = {row[1] for row in cursor.fetchall()}
migrations = [
("feeds", "error_type", "VARCHAR(32) DEFAULT ''"),
]
for table, column, col_type in migrations:
if column not in existing:
logger.info(f"迁移: ALTER TABLE {table} ADD COLUMN {column} {col_type}")
cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}")
conn.commit()
# 对已有错误数据分类
from rss_fetcher import classify_error
cursor.execute("SELECT id, last_error FROM feeds WHERE last_error != '' AND (error_type IS NULL OR error_type = '')")
rows = cursor.fetchall()
for row in rows:
feed_id, error = row
etype = classify_error(error)
if etype:
cursor.execute("UPDATE feeds SET error_type = ? WHERE id = ?", (etype, feed_id))
if rows:
conn.commit()
logger.info(f"迁移: 已分类 {len(rows)} 条历史错误")
cursor.close()
conn.close()
def init_fts5():
"""初始化 FTS5 全文搜索虚拟表"""
conn = engine.raw_connection()
+3 -3
View File
@@ -1,5 +1,5 @@
"""RSS 源健康度检测"""
from datetime import datetime, timedelta, timezone
from datetime import datetime, timedelta
from typing import List, Dict
from sqlalchemy import func
from sqlalchemy.orm import Session
@@ -10,7 +10,7 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
"""获取 RSS 源健康度信息
返回每个源的健康状态详情
"""
now = datetime.now(timezone.utc)
now = datetime.utcnow()
query = db.query(Feed)
if feed_id:
query = query.filter(Feed.id == feed_id)
@@ -84,7 +84,7 @@ def get_overall_stats(db: Session) -> Dict:
# 健康源统计
feeds = db.query(Feed).all()
healthy = warning = unhealthy = 0
now = datetime.now(timezone.utc)
now = datetime.utcnow()
for feed in feeds:
status = feed.health_status(now=now)
if status == "healthy":
+2 -21
View File
@@ -3,7 +3,6 @@ import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware
from database import init_db, SessionLocal
from scheduler import init_feed_jobs, stop_scheduler
@@ -61,25 +60,7 @@ def health_check():
return {"status": "ok", "service": "rssKeeper"}
# 静态文件服务(前端构建产物)
# 静态文件服务(前端构建产物)— 必须放在最后,API 路由优先匹配
static_dir = os.path.join(config.BASE_DIR, "static")
if os.path.exists(static_dir):
app.mount("/static", StaticFiles(directory=static_dir), name="static")
# API 路径白名单 — 这些路径不应被 SPA 兜底
_API_PATHS = {
"api", "docs", "openapi.json", "redoc",
}
@app.get("/{full_path:path}")
async def serve_spa(full_path: str):
"""Vue SPA 路由回退"""
# API/文档路由不走 SPA 兜底
first_seg = full_path.split("/")[0] if full_path else ""
if first_seg in _API_PATHS:
return {"detail": "Not found"}
index_path = os.path.join(static_dir, "index.html")
if os.path.exists(index_path):
return FileResponse(index_path)
return {"detail": "Frontend not built"}
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
+7 -5
View File
@@ -1,5 +1,5 @@
"""SQLAlchemy 数据模型"""
from datetime import datetime, timezone
from datetime import datetime
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey
from sqlalchemy.orm import relationship
from database import Base
@@ -21,11 +21,12 @@ class Feed(Base):
last_fetch_at = Column(DateTime, nullable=True)
last_fetch_status = Column(String(20), default="")
last_error = Column(Text, default="")
error_type = Column(String(32), default="")
success_count = Column(Integer, default=0)
fail_count = Column(Integer, default=0)
article_count = Column(Integer, default=0)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
created_at = Column(DateTime, default=datetime.utcnow)
# 关联
articles = relationship("Article", back_populates="feed", cascade="all, delete-orphan")
@@ -36,6 +37,7 @@ class Feed(Base):
🟢 健康: 成功率 >= 90%, 最近7天有更新
🟡 警告: 成功率 50%-90%, 或超过3天未更新
🔴 异常: 成功率 < 50%, 或超过7天未更新
⚪ 未知: 尚未进行过任何抓取
"""
total = self.success_count + self.fail_count
if total == 0:
@@ -44,7 +46,7 @@ class Feed(Base):
success_rate = self.success_count / total
if now is None:
now = datetime.now(timezone.utc)
now = datetime.utcnow()
days_since_last_fetch = None
if self.last_fetch_at:
@@ -71,7 +73,7 @@ class Article(Base):
content = Column(Text, default="")
summary = Column(Text, default="")
is_read = Column(Boolean, default=False)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
created_at = Column(DateTime, default=datetime.utcnow, index=True)
# 关联
feed = relationship("Feed", back_populates="articles")
@@ -87,7 +89,7 @@ class FetchLog(Base):
articles_fetched = Column(Integer, default=0)
error_message = Column(Text, default="")
response_time_ms = Column(Integer, nullable=True)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
created_at = Column(DateTime, default=datetime.utcnow, index=True)
# 关联
feed = relationship("Feed", back_populates="fetch_logs")
+3 -3
View File
@@ -1,6 +1,6 @@
"""对外 API(供 AI/外部系统调用)"""
from typing import Optional
from datetime import datetime, timedelta, timezone
from datetime import datetime, timedelta
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import desc
@@ -21,7 +21,7 @@ def get_recent_articles(
"""获取最近 N 小时的文章
这是对外提供给 AI 分析的主要接口
"""
since = datetime.now(timezone.utc) - timedelta(hours=hours)
since = datetime.utcnow() - timedelta(hours=hours)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
@@ -136,7 +136,7 @@ def get_daily_summary(
except ValueError:
return {"error": "Invalid date format, use YYYY-MM-DD"}
else:
day = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
next_day = day + timedelta(days=1)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
+38 -4
View File
@@ -5,8 +5,8 @@ from pydantic import BaseModel, HttpUrl
from sqlalchemy.orm import Session
from database import get_db
from models import Feed
from rss_fetcher import discover_feed_url, fetch_and_store_feed
from scheduler import add_feed_job, remove_feed_job
from rss_fetcher import discover_feed_url, fetch_and_store_feed, fetch_all_feeds
from scheduler import add_feed_job, remove_feed_job, get_feed_next_run
router = APIRouter(prefix="/feeds", tags=["feeds"])
@@ -55,9 +55,10 @@ def list_feeds(
category: Optional[str] = None,
search: Optional[str] = None,
is_active: Optional[bool] = None,
health_status: Optional[str] = None,
db: Session = Depends(get_db),
):
"""获取 RSS 源列表,支持分页、分类筛选、搜索"""
"""获取 RSS 源列表,支持分页、分类筛选、搜索、健康度筛选"""
query = db.query(Feed)
if category:
@@ -70,10 +71,22 @@ def list_feeds(
)
total = query.count()
feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all()
# 健康度是计算字段,需要在 Python 中过滤
if health_status:
all_feeds = query.order_by(Feed.created_at.desc()).all()
matched = []
for feed in all_feeds:
if feed.health_status() == health_status:
matched.append(feed)
total = len(matched)
feeds = matched[skip:skip + limit]
else:
feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all()
results = []
for feed in feeds:
next_run = get_feed_next_run(feed.id)
data = {
"id": feed.id,
"url": feed.url,
@@ -84,10 +97,13 @@ def list_feeds(
"fetch_interval_minutes": feed.fetch_interval_minutes,
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
"last_fetch_status": feed.last_fetch_status,
"last_error": feed.last_error,
"error_type": feed.error_type,
"success_count": feed.success_count,
"fail_count": feed.fail_count,
"article_count": feed.article_count,
"health_status": feed.health_status(),
"next_fetch_time": next_run.isoformat() if next_run else None,
"created_at": feed.created_at.isoformat(),
}
results.append(data)
@@ -210,6 +226,24 @@ def delete_feed(feed_id: int, db: Session = Depends(get_db)):
return {"message": "RSS 源已删除"}
class BatchFetchRequest(BaseModel):
feed_ids: List[int]
@router.post("/batch-fetch")
def batch_fetch(data: BatchFetchRequest):
"""批量抓取(并发同步执行,等待结果返回)"""
results = fetch_all_feeds(data.feed_ids)
success = sum(1 for r in results if r.get("success"))
fail = len(results) - success
return {
"message": f"完成:{success} 个成功,{fail} 个失败",
"total": len(results),
"success": success,
"fail": fail,
}
@router.post("/{feed_id}/fetch")
def trigger_fetch(feed_id: int, db: Session = Depends(get_db)):
"""手动触发抓取"""
+40 -5
View File
@@ -2,7 +2,7 @@
import time
import re
import html
from datetime import datetime, timezone
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin
import requests
@@ -14,6 +14,39 @@ from database import SessionLocal
import config
def classify_error(error: str) -> str:
"""根据错误信息分类错误类型"""
if not error:
return ""
err = error.lower()
if "404" in error or "not found" in err:
return "url_invalid"
if "403" in error or "forbidden" in err:
return "forbidden"
if "429" in error or "too many request" in err:
return "rate_limited"
if "timeout" in err or "timed out" in err:
return "timeout"
if "connecttimeout" in err or "connectiontimeout" in err:
return "timeout"
if "could not resolve" in err or "name or service not known" in err or "nodename nor servname" in err:
return "dns_failure"
if "connection refused" in err:
return "connection_refused"
if "connection aborted" in err or "remotedisconnected" in err or "remote end closed" in err:
return "connection_reset"
if "ssl" in err or "certificate" in err or "certifi" in err:
return "ssl_error"
if "max retries" in err or "newconnectionerror" in err:
return "unreachable"
if "invalid url" in err or "no host" in err or "missing scheme" in err:
return "url_malformed"
if "5" in error and "server error" in err:
return "server_error"
return "unknown"
def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
"""抓取单个 RSS 源
返回 {"success": bool, "feed_data": parsed, "error": str, "response_time_ms": int}
@@ -102,12 +135,12 @@ def parse_article(entry, feed_id: int) -> dict:
published_at = None
if hasattr(entry, "published_parsed") and entry.published_parsed:
try:
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
published_at = datetime(*entry.published_parsed[:6])
except (ValueError, TypeError):
pass
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
try:
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
published_at = datetime(*entry.updated_parsed[:6])
except (ValueError, TypeError):
pass
@@ -199,9 +232,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
if not result["success"]:
# 记录失败
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_status = "fail"
feed.last_error = result["error"]
feed.error_type = classify_error(result["error"])
feed.fail_count += 1
log = FetchLog(
@@ -264,9 +298,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
existing.published_at = article_data["published_at"]
# 更新 feed 统计
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_status = "success"
feed.last_error = ""
feed.error_type = ""
feed.success_count += 1
feed.article_count += new_count
+9
View File
@@ -65,6 +65,15 @@ def stop_scheduler():
_scheduler = None
def get_feed_next_run(feed_id: int):
"""获取指定 RSS 源的下一次抓取时间"""
scheduler = get_scheduler()
if not scheduler.running:
return None
job = scheduler.get_job(f"fetch_feed_{feed_id}")
return job.next_run_time if job else None
def init_feed_jobs(db):
"""从数据库加载所有活跃 RSS 源并注册定时任务"""
from models import Feed