feat: 深色主题UI、错误分类、批量抓取、健康度筛选
- 修复 datetime 时区不一致导致所有API 500错误的问题 - Feeds/Dashboard 页面改为深色表格主题,高对比度文字 - 添加错误类型自动分类(URL失效/被拒绝/超时/DNS失败/SSL错误等12种) - 新增"下次抓取时间"列,从APScheduler获取 - 新增健康度筛选下拉,修复分页后过滤失效的bug - "全部抓取"改为同步并发执行,基于当前筛选条件获取所有匹配源 - 新增数据库自动迁移机制,处理增量列变更 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+40
-5
@@ -2,7 +2,7 @@
|
||||
import time
|
||||
import re
|
||||
import html
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
@@ -14,6 +14,39 @@ from database import SessionLocal
|
||||
import config
|
||||
|
||||
|
||||
def classify_error(error: str) -> str:
|
||||
"""根据错误信息分类错误类型"""
|
||||
if not error:
|
||||
return ""
|
||||
err = error.lower()
|
||||
|
||||
if "404" in error or "not found" in err:
|
||||
return "url_invalid"
|
||||
if "403" in error or "forbidden" in err:
|
||||
return "forbidden"
|
||||
if "429" in error or "too many request" in err:
|
||||
return "rate_limited"
|
||||
if "timeout" in err or "timed out" in err:
|
||||
return "timeout"
|
||||
if "connecttimeout" in err or "connectiontimeout" in err:
|
||||
return "timeout"
|
||||
if "could not resolve" in err or "name or service not known" in err or "nodename nor servname" in err:
|
||||
return "dns_failure"
|
||||
if "connection refused" in err:
|
||||
return "connection_refused"
|
||||
if "connection aborted" in err or "remotedisconnected" in err or "remote end closed" in err:
|
||||
return "connection_reset"
|
||||
if "ssl" in err or "certificate" in err or "certifi" in err:
|
||||
return "ssl_error"
|
||||
if "max retries" in err or "newconnectionerror" in err:
|
||||
return "unreachable"
|
||||
if "invalid url" in err or "no host" in err or "missing scheme" in err:
|
||||
return "url_malformed"
|
||||
if "5" in error and "server error" in err:
|
||||
return "server_error"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
|
||||
"""抓取单个 RSS 源
|
||||
返回 {"success": bool, "feed_data": parsed, "error": str, "response_time_ms": int}
|
||||
@@ -102,12 +135,12 @@ def parse_article(entry, feed_id: int) -> dict:
|
||||
published_at = None
|
||||
if hasattr(entry, "published_parsed") and entry.published_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
|
||||
published_at = datetime(*entry.published_parsed[:6])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
|
||||
published_at = datetime(*entry.updated_parsed[:6])
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@@ -199,9 +232,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
|
||||
if not result["success"]:
|
||||
# 记录失败
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_status = "fail"
|
||||
feed.last_error = result["error"]
|
||||
feed.error_type = classify_error(result["error"])
|
||||
feed.fail_count += 1
|
||||
|
||||
log = FetchLog(
|
||||
@@ -264,9 +298,10 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
existing.published_at = article_data["published_at"]
|
||||
|
||||
# 更新 feed 统计
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_status = "success"
|
||||
feed.last_error = ""
|
||||
feed.error_type = ""
|
||||
feed.success_count += 1
|
||||
feed.article_count += new_count
|
||||
|
||||
|
||||
Reference in New Issue
Block a user