feat: 代理支持、外部API增强、调度器修复、每日文章看板
- 添加 HTTP 代理支持(国内直连、外网走代理) - 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选 - 修复 APScheduler 线程静默崩溃(_safe_fetch 异常保护) - 健康检查暴露调度器状态 - Dashboard 新增每日文章数柱状图(按 published_at) - 文章列表 API 补上 content 字段,日期筛选修复时间范围 - 修复外部 API 双重 external 前缀 - User-Agent 改为 Chrome 标识缓解 403 - 添加完整 API 接口文档 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+33
-4
@@ -13,6 +13,35 @@ from models import Feed, Article, FetchLog
|
||||
from database import SessionLocal
|
||||
import config
|
||||
|
||||
# 国内域名后缀/关键字 — 这些直连,其余走代理
|
||||
CN_DOMAINS = (
|
||||
".cn", ".com.cn", ".org.cn", ".net.cn",
|
||||
"36kr.com", "zhihu.com", "weibo.com", "douban.com", "bilibili.com",
|
||||
"tmtpost.com", "ifanr.com", "geekpark.net", "pingwest.com",
|
||||
"juejin.cn", "segmentfault.com", "cnblogs.com", "csdn.net",
|
||||
"qq.com", "163.com", "sohu.com", "sina.com.cn", "baidu.com",
|
||||
"taobao.com", "jd.com", "aliyun.com",
|
||||
"xinhuanet.com", "people.com.cn", "sciencenet.cn",
|
||||
"localhost", "127.0.0.1", "192.168.",
|
||||
)
|
||||
|
||||
|
||||
def _get_proxies(url: str) -> dict:
|
||||
"""根据 URL 判断是否需要代理,返回 proxies dict"""
|
||||
if not config.HTTPS_PROXY:
|
||||
return {}
|
||||
from urllib.parse import urlparse
|
||||
host = urlparse(url).hostname or ""
|
||||
# 国内域名直连
|
||||
for d in CN_DOMAINS:
|
||||
if host.endswith(d) or host == d:
|
||||
return {}
|
||||
# 外网走代理
|
||||
return {
|
||||
"http": config.HTTP_PROXY or config.HTTPS_PROXY,
|
||||
"https": config.HTTPS_PROXY,
|
||||
}
|
||||
|
||||
|
||||
def classify_error(error: str) -> str:
|
||||
"""根据错误信息分类错误类型"""
|
||||
@@ -54,10 +83,10 @@ def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
|
||||
start_time = time.time()
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
|
||||
"Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*",
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
|
||||
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
|
||||
response.raise_for_status()
|
||||
|
||||
# 解析 RSS
|
||||
@@ -87,9 +116,9 @@ def discover_feed_url(url: str, timeout: int = 15) -> list:
|
||||
"""
|
||||
try:
|
||||
headers = {
|
||||
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
|
||||
}
|
||||
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
|
||||
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
Reference in New Issue
Block a user