feat: 代理支持、外部API增强、调度器修复、每日文章看板

- 添加 HTTP 代理支持（国内直连、外网走代理） - 外部 API 新增全文搜索、源健康度/错误筛选、未读筛选 - 修复 APScheduler 线程静默崩溃（_safe_fetch 异常保护） - 健康检查暴露调度器状态 - Dashboard 新增每日文章数柱状图（按 published_at） - 文章列表 API 补上 content 字段，日期筛选修复时间范围 - 修复外部 API 双重 external 前缀 - User-Agent 改为 Chrome 标识缓解 403 - 添加完整 API 接口文档 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-12 09:58:32 +08:00
parent 68bba3d9e0
commit 4286731348
12 changed files with 1057 additions and 44 deletions
@@ -13,6 +13,35 @@ from models import Feed, Article, FetchLog
 from database import SessionLocal
 import config

+# 国内域名后缀/关键字 — 这些直连，其余走代理
+CN_DOMAINS = (
+    ".cn", ".com.cn", ".org.cn", ".net.cn",
+    "36kr.com", "zhihu.com", "weibo.com", "douban.com", "bilibili.com",
+    "tmtpost.com", "ifanr.com", "geekpark.net", "pingwest.com",
+    "juejin.cn", "segmentfault.com", "cnblogs.com", "csdn.net",
+    "qq.com", "163.com", "sohu.com", "sina.com.cn", "baidu.com",
+    "taobao.com", "jd.com", "aliyun.com",
+    "xinhuanet.com", "people.com.cn", "sciencenet.cn",
+    "localhost", "127.0.0.1", "192.168.",
+)
+
+
+def _get_proxies(url: str) -> dict:
+    """根据 URL 判断是否需要代理，返回 proxies dict"""
+    if not config.HTTPS_PROXY:
+        return {}
+    from urllib.parse import urlparse
+    host = urlparse(url).hostname or ""
+    # 国内域名直连
+    for d in CN_DOMAINS:
+        if host.endswith(d) or host == d:
+            return {}
+    # 外网走代理
+    return {
+        "http": config.HTTP_PROXY or config.HTTPS_PROXY,
+        "https": config.HTTPS_PROXY,
+    }
+

 def classify_error(error: str) -> str:
    """根据错误信息分类错误类型"""
@@ -54,10 +83,10 @@ def fetch_feed(url: str, timeout: int = config.FETCH_TIMEOUT) -> dict:
    start_time = time.time()
    try:
        headers = {
-            "User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
            "Accept": "application/rss+xml, application/atom+xml, application/xml, text/xml, */*",
        }
-        response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
+        response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
        response.raise_for_status()

        # 解析 RSS
@@ -87,9 +116,9 @@ def discover_feed_url(url: str, timeout: int = 15) -> list:
    """
    try:
        headers = {
-            "User-Agent": "rssKeeper/1.0 (+https://github.com/rssKeeper)",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0 Safari/537.36",
        }
-        response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True)
+        response = requests.get(url, headers=headers, timeout=timeout, allow_redirects=True, proxies=_get_proxies(url))
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")