feat: 深色主题UI、错误分类、批量抓取、健康度筛选
- 修复 datetime 时区不一致导致所有API 500错误的问题 - Feeds/Dashboard 页面改为深色表格主题,高对比度文字 - 添加错误类型自动分类(URL失效/被拒绝/超时/DNS失败/SSL错误等12种) - 新增"下次抓取时间"列,从APScheduler获取 - 新增健康度筛选下拉,修复分页后过滤失效的bug - "全部抓取"改为同步并发执行,基于当前筛选条件获取所有匹配源 - 新增数据库自动迁移机制,处理增量列变更 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -35,9 +35,48 @@ def init_db():
|
||||
from models import Feed, Article, FetchLog # noqa
|
||||
|
||||
Base.metadata.create_all(bind=engine)
|
||||
_migrate(engine)
|
||||
init_fts5()
|
||||
|
||||
|
||||
def _migrate(engine):
|
||||
"""处理数据库增量迁移(添加新列)"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
conn = engine.raw_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 获取 feeds 表现有列
|
||||
cursor.execute("PRAGMA table_info(feeds)")
|
||||
existing = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
migrations = [
|
||||
("feeds", "error_type", "VARCHAR(32) DEFAULT ''"),
|
||||
]
|
||||
for table, column, col_type in migrations:
|
||||
if column not in existing:
|
||||
logger.info(f"迁移: ALTER TABLE {table} ADD COLUMN {column} {col_type}")
|
||||
cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}")
|
||||
|
||||
conn.commit()
|
||||
|
||||
# 对已有错误数据分类
|
||||
from rss_fetcher import classify_error
|
||||
cursor.execute("SELECT id, last_error FROM feeds WHERE last_error != '' AND (error_type IS NULL OR error_type = '')")
|
||||
rows = cursor.fetchall()
|
||||
for row in rows:
|
||||
feed_id, error = row
|
||||
etype = classify_error(error)
|
||||
if etype:
|
||||
cursor.execute("UPDATE feeds SET error_type = ? WHERE id = ?", (etype, feed_id))
|
||||
if rows:
|
||||
conn.commit()
|
||||
logger.info(f"迁移: 已分类 {len(rows)} 条历史错误")
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
def init_fts5():
|
||||
"""初始化 FTS5 全文搜索虚拟表"""
|
||||
conn = engine.raw_connection()
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""RSS 源健康度检测"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -10,7 +10,7 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
|
||||
"""获取 RSS 源健康度信息
|
||||
返回每个源的健康状态详情
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.utcnow()
|
||||
query = db.query(Feed)
|
||||
if feed_id:
|
||||
query = query.filter(Feed.id == feed_id)
|
||||
@@ -84,7 +84,7 @@ def get_overall_stats(db: Session) -> Dict:
|
||||
# 健康源统计
|
||||
feeds = db.query(Feed).all()
|
||||
healthy = warning = unhealthy = 0
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.utcnow()
|
||||
for feed in feeds:
|
||||
status = feed.health_status(now=now)
|
||||
if status == "healthy":
|
||||
|
||||
+2
-21
@@ -3,7 +3,6 @@ import os
|
||||
from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.responses import FileResponse
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
from database import init_db, SessionLocal
|
||||
from scheduler import init_feed_jobs, stop_scheduler
|
||||
@@ -61,25 +60,7 @@ def health_check():
|
||||
return {"status": "ok", "service": "rssKeeper"}
|
||||
|
||||
|
||||
# 静态文件服务(前端构建产物)
|
||||
# 静态文件服务(前端构建产物)— 必须放在最后,API 路由优先匹配
|
||||
static_dir = os.path.join(config.BASE_DIR, "static")
|
||||
if os.path.exists(static_dir):
|
||||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||||
|
||||
# API 路径白名单 — 这些路径不应被 SPA 兜底
|
||||
_API_PATHS = {
|
||||
"api", "docs", "openapi.json", "redoc",
|
||||
}
|
||||
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa(full_path: str):
|
||||
"""Vue SPA 路由回退"""
|
||||
# API/文档路由不走 SPA 兜底
|
||||
first_seg = full_path.split("/")[0] if full_path else ""
|
||||
if first_seg in _API_PATHS:
|
||||
return {"detail": "Not found"}
|
||||
|
||||
index_path = os.path.join(static_dir, "index.html")
|
||||
if os.path.exists(index_path):
|
||||
return FileResponse(index_path)
|
||||
return {"detail": "Frontend not built"}
|
||||
app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
|
||||
|
||||
+7
-5
@@ -1,5 +1,5 @@
|
||||
"""SQLAlchemy 数据模型"""
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from database import Base
|
||||
@@ -21,11 +21,12 @@ class Feed(Base):
|
||||
last_fetch_at = Column(DateTime, nullable=True)
|
||||
last_fetch_status = Column(String(20), default="")
|
||||
last_error = Column(Text, default="")
|
||||
error_type = Column(String(32), default="")
|
||||
success_count = Column(Integer, default=0)
|
||||
fail_count = Column(Integer, default=0)
|
||||
article_count = Column(Integer, default=0)
|
||||
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
|
||||
# 关联
|
||||
articles = relationship("Article", back_populates="feed", cascade="all, delete-orphan")
|
||||
@@ -36,6 +37,7 @@ class Feed(Base):
|
||||
🟢 健康: 成功率 >= 90%, 最近7天有更新
|
||||
🟡 警告: 成功率 50%-90%, 或超过3天未更新
|
||||
🔴 异常: 成功率 < 50%, 或超过7天未更新
|
||||
⚪ 未知: 尚未进行过任何抓取
|
||||
"""
|
||||
total = self.success_count + self.fail_count
|
||||
if total == 0:
|
||||
@@ -44,7 +46,7 @@ class Feed(Base):
|
||||
success_rate = self.success_count / total
|
||||
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
now = datetime.utcnow()
|
||||
|
||||
days_since_last_fetch = None
|
||||
if self.last_fetch_at:
|
||||
@@ -71,7 +73,7 @@ class Article(Base):
|
||||
content = Column(Text, default="")
|
||||
summary = Column(Text, default="")
|
||||
is_read = Column(Boolean, default=False)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
# 关联
|
||||
feed = relationship("Feed", back_populates="articles")
|
||||
@@ -87,7 +89,7 @@ class FetchLog(Base):
|
||||
articles_fetched = Column(Integer, default=0)
|
||||
error_message = Column(Text, default="")
|
||||
response_time_ms = Column(Integer, nullable=True)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
|
||||
# 关联
|
||||
feed = relationship("Feed", back_populates="fetch_logs")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""对外 API(供 AI/外部系统调用)"""
|
||||
from typing import Optional
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from datetime import datetime, timedelta
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import desc
|
||||
@@ -21,7 +21,7 @@ def get_recent_articles(
|
||||
"""获取最近 N 小时的文章
|
||||
这是对外提供给 AI 分析的主要接口
|
||||
"""
|
||||
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
||||
since = datetime.utcnow() - timedelta(hours=hours)
|
||||
|
||||
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
|
||||
|
||||
@@ -136,7 +136,7 @@ def get_daily_summary(
|
||||
except ValueError:
|
||||
return {"error": "Invalid date format, use YYYY-MM-DD"}
|
||||
else:
|
||||
day = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
next_day = day + timedelta(days=1)
|
||||
|
||||
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
|
||||
|
||||
@@ -5,8 +5,8 @@ from pydantic import BaseModel, HttpUrl
|
||||
from sqlalchemy.orm import Session
|
||||
from database import get_db
|
||||
from models import Feed
|
||||
from rss_fetcher import discover_feed_url, fetch_and_store_feed
|
||||
from scheduler import add_feed_job, remove_feed_job
|
||||
from rss_fetcher import discover_feed_url, fetch_and_store_feed, fetch_all_feeds
|
||||
from scheduler import add_feed_job, remove_feed_job, get_feed_next_run
|
||||
|
||||
router = APIRouter(prefix="/feeds", tags=["feeds"])
|
||||
|
||||
@@ -55,9 +55,10 @@ def list_feeds(
|
||||
category: Optional[str] = None,
|
||||
search: Optional[str] = None,
|
||||
is_active: Optional[bool] = None,
|
||||
health_status: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""获取 RSS 源列表,支持分页、分类筛选、搜索"""
|
||||
"""获取 RSS 源列表,支持分页、分类筛选、搜索、健康度筛选"""
|
||||
query = db.query(Feed)
|
||||
|
||||
if category:
|
||||
@@ -70,10 +71,22 @@ def list_feeds(
|
||||
)
|
||||
|
||||
total = query.count()
|
||||
feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all()
|
||||
|
||||
# 健康度是计算字段,需要在 Python 中过滤
|
||||
if health_status:
|
||||
all_feeds = query.order_by(Feed.created_at.desc()).all()
|
||||
matched = []
|
||||
for feed in all_feeds:
|
||||
if feed.health_status() == health_status:
|
||||
matched.append(feed)
|
||||
total = len(matched)
|
||||
feeds = matched[skip:skip + limit]
|
||||
else:
|
||||
feeds = query.order_by(Feed.created_at.desc()).offset(skip).limit(limit).all()
|
||||
|
||||
results = []
|
||||
for feed in feeds:
|
||||
next_run = get_feed_next_run(feed.id)
|
||||
data = {
|
||||
"id": feed.id,
|
||||
"url": feed.url,
|
||||
@@ -84,10 +97,13 @@ def list_feeds(
|
||||
"fetch_interval_minutes": feed.fetch_interval_minutes,
|
||||
"last_fetch_at": feed.last_fetch_at.isoformat() if feed.last_fetch_at else None,
|
||||
"last_fetch_status": feed.last_fetch_status,
|
||||
"last_error": feed.last_error,
|
||||
"error_type": feed.error_type,
|
||||
"success_count": feed.success_count,
|
||||
"fail_count": feed.fail_count,
|
||||
"article_count": feed.article_count,
|
||||
"health_status": feed.health_status(),
|
||||
"next_fetch_time": next_run.isoformat() if next_run else None,
|
||||
"created_at": feed.created_at.isoformat(),
|
||||
}
|
||||
results.append(data)
|
||||
@@ -210,6 +226,24 @@ def delete_feed(feed_id: int, db: Session = Depends(get_db)):
|
||||
return {"message": "RSS 源已删除"}
|
||||
|
||||
|
||||
class BatchFetchRequest(BaseModel):
|
||||
feed_ids: List[int]
|
||||
|
||||
|
||||
@router.post("/batch-fetch")
|
||||
def batch_fetch(data: BatchFetchRequest):
|
||||
"""批量抓取(并发同步执行,等待结果返回)"""
|
||||
results = fetch_all_feeds(data.feed_ids)
|
||||
success = sum(1 for r in results if r.get("success"))
|
||||
fail = len(results) - success
|
||||
return {
|
||||
"message": f"完成:{success} 个成功,{fail} 个失败",
|
||||
"total": len(results),
|
||||
"success": success,
|
||||
"fail": fail,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{feed_id}/fetch")
|
||||
def trigger_fetch(feed_id: int, db: Session = Depends(get_db)):
|
||||
"""手动触发抓取"""
|
||||
|
||||
+40
-5
@@ -2,7 +2,7 @@
|
||||
import time
|
||||
import re
|
||||
import html
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
@@ -14,6 +14,39 @@ from database import SessionLocal
|
||||
import config
|
||||
|
||||
|
||||
def classify_error(error: str) -> str:
|
||||
"""根据错误信息分类错误类型"""
|
||||
if not error:
|
||||
return ""
|
||||
err = error.lower()
|
||||
|
||||
if "404" in error or "not found" in err:
|
||||
return "url_invalid"
|
||||
if "403" in error or "forbidden" in err:
|
||||
return "forbidden"
|
||||
if "429" in error or "too many request" in err:
|
||||
return "rate_limited"
|
||||
if "timeout" in err or "timed out" in err:
|
||||
return "timeout"
|
||||
if "connecttimeout" in err or "connectiontimeout" in err:
|
||||
return "timeout"
|
||||
if "could not resolve" in err or "name or service not known" in err or "nodename nor servname" in err:
|
||||
return "dns_failure"
|
||||
if "connection refused" in err:
|
||||
return "connection_refused"
|
||||
if "connection aborted" in err or "remotedisconnected" in err or "remote end closed" in err:
|
||||
return "connection_reset"
|
||||
if "ssl" in err or "certificate" in err or "certifi" in err:
|
||||
return "ssl_error"
|
||||
if "max retries" in err or "newconnectionerror" in err:
|
||||
return "unreachable"
|
||||
if "invalid url" in err or "no host" in err or "missing scheme" in err:
|
||||
return "url_malformed"
|
||||
if "5" in error and "server error" in err:
|
||||
return "server_error"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
|
||||
"""抓取单个 RSS 源
|
||||
返回 {"success": bool, "feed_data": parsed, "error": str, "response_time_ms": int}
|
||||
@@ -102,12 +135,12 @@ def parse_article(entry, feed_id: int) -> dict:
|
||||
published_at = None
|
||||
if hasattr(entry, "published_parsed") and entry.published_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
|
||||
published_at = datetime(*entry.published_parsed[:6])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
|
||||
published_at = datetime(*entry.updated_parsed[:6])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@@ -199,9 +232,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
|
||||
if not result["success"]:
|
||||
# 记录失败
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_status = "fail"
|
||||
feed.last_error = result["error"]
|
||||
feed.error_type = classify_error(result["error"])
|
||||
feed.fail_count += 1
|
||||
|
||||
log = FetchLog(
|
||||
@@ -264,9 +298,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
existing.published_at = article_data["published_at"]
|
||||
|
||||
# 更新 feed 统计
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_status = "success"
|
||||
feed.last_error = ""
|
||||
feed.error_type = ""
|
||||
feed.success_count += 1
|
||||
feed.article_count += new_count
|
||||
|
||||
|
||||
@@ -65,6 +65,15 @@ def stop_scheduler():
|
||||
_scheduler = None
|
||||
|
||||
|
||||
def get_feed_next_run(feed_id: int):
|
||||
"""获取指定 RSS 源的下一次抓取时间"""
|
||||
scheduler = get_scheduler()
|
||||
if not scheduler.running:
|
||||
return None
|
||||
job = scheduler.get_job(f"fetch_feed_{feed_id}")
|
||||
return job.next_run_time if job else None
|
||||
|
||||
|
||||
def init_feed_jobs(db):
|
||||
"""从数据库加载所有活跃 RSS 源并注册定时任务"""
|
||||
from models import Feed
|
||||
|
||||
Reference in New Issue
Block a user