feat: 代理支持、外部API增强、调度器修复、每日文章看板

- 添加 HTTP 代理支持(国内直连、外网走代理)
- 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选
- 修复 APScheduler 线程静默崩溃(_safe_fetch 异常保护)
- 健康检查暴露调度器状态
- Dashboard 新增每日文章数柱状图(按 published_at)
- 文章列表 API 补上 content 字段,日期筛选修复时间范围
- 修复外部 API 双重 external 前缀
- User-Agent 改为 Chrome 标识缓解 403
- 添加完整 API 接口文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
congsh
2026-06-12 09:58:32 +08:00
parent 68bba3d9e0
commit 4286731348
12 changed files with 1057 additions and 44 deletions
+33 -4
View File
@@ -13,6 +13,35 @@ from models import Feed, Article, FetchLog
from database import SessionLocal
import config
# 国内域名后缀/关键字 — 这些直连,其余走代理
CN_DOMAINS = (
".cn", ".com.cn", ".org.cn", ".net.cn",
"36kr.com", "zhihu.com", "weibo.com", "douban.com", "bilibili.com",
"tmtpost.com", "ifanr.com", "geekpark.net", "pingwest.com",
"juejin.cn", "segmentfault.com", "cnblogs.com", "csdn.net",
"qq.com", "163.com", "sohu.com", "sina.com.cn", "baidu.com",
"taobao.com", "jd.com", "aliyun.com",
"xinhuanet.com", "people.com.cn", "sciencenet.cn",
"localhost", "127.0.0.1", "192.168.",
)
def _get_proxies(url: str) -> dict:
"""根据 URL 判断是否需要代理,返回 proxies dict"""
if not config.HTTPS_PROXY:
return {}
from urllib.parse import urlparse
host = urlparse(url).hostname or ""
# 国内域名直连
for d in CN_DOMAINS:
if host.endswith(d) or host == d:
return {}
# 外网走代理
return {
"http": config.HTTP_PROXY or config.HTTPS_PROXY,
"https": config.HTTPS_PROXY,
}
def classify_error(error: str) -> str:
"""根据错误信息分类错误类型"""
@@ -54,10 +83,10 @@ def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
start_time = time.time()
try:
headers = {
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
"Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*",
}
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
response.raise_for_status()
# 解析 RSS
@@ -87,9 +116,9 @@ def discover_feed_url(url: str, timeout: int = 15) -> list:
"""
try:
headers = {
"User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
}
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")