fix: 端口更换 & 代码审核修复

端口:
- 服务端口 8000 → 7329
- 前端开发端口 5173 → 7330

安全:
- CORS 收紧为白名单,关闭 credentials
- SPA 路由白名单完善
- 前端 XSS 转义

可靠性:
- 时区统一为 datetime.now(timezone.utc)
- 文章入库改为内存去重 + 增量计数
- OPML 导入改为 body 参数接收
- OPML 导出 URL XML 转义
- 首次抓取改为 BackgroundTasks 异步
- articles.py HTTPException 移到顶部 import
- FTS5 异常显式日志
- FTS5 查询加引号包裹防布尔注入
- 中文摘要支持中文标点
- 去掉未使用的 hashlib import

部署:
- Dockerfile 锁 python:3.12.7-slim
- requirements 锁定具体版本
- healthcheck 不用 curl(镜像里没有)
- docker-compose 使用 .env 文件
- 新增 .env 配置文件
This commit is contained in:
congsh
2026-06-11 14:31:29 +08:00
parent 54e7db0ef0
commit c59dd304f7
17 changed files with 701 additions and 106 deletions
+7 -3
View File
@@ -43,15 +43,19 @@ def init_fts5():
conn = engine.raw_connection()
cursor = conn.cursor()
import logging
logger = logging.getLogger(__name__)
# 检查 FTS5 扩展是否可用
try:
cursor.execute("SELECT sqlite_compileoption_used('ENABLE_FTS5')")
has_fts5 = cursor.fetchone()[0]
if not has_fts5:
print("警告: SQLite 未启用 FTS5 扩展,全文搜索将不可用")
logger.warning("SQLite 未启用 FTS5 扩展,全文搜索将不可用")
return
except Exception:
pass
except Exception as e:
logger.error(f"FTS5 检测失败: {e}")
return
# 创建 FTS5 虚拟表
cursor.execute("""
+6 -1
View File
@@ -10,8 +10,13 @@ def search_articles(query: str, limit: int = 50, offset: int = 0):
if not query or not query.strip():
return [], 0
# 转义 FTS5 特殊字符
# 转义 FTS5 特殊字符(双引号、* 等)
# 简单策略:将用户查询视为一个整体短语,加引号包裹
query = query.replace('"', '""').strip()
if not query:
return [], 0
# 用双引号包裹,避免 FTS5 布尔操作符被误解析
query = f'"{query}"'
conn = engine.raw_connection()
cursor = conn.cursor()
+11 -9
View File
@@ -1,6 +1,7 @@
"""RSS 源健康度检测"""
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from typing import List, Dict
from sqlalchemy import func
from sqlalchemy.orm import Session
from models import Feed, FetchLog
@@ -9,6 +10,7 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
"""获取 RSS 源健康度信息
返回每个源的健康状态详情
"""
now = datetime.now(timezone.utc)
query = db.query(Feed)
if feed_id:
query = query.filter(Feed.id == feed_id)
@@ -22,15 +24,16 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
days_since_fetch = None
if feed.last_fetch_at:
days_since_fetch = (datetime.utcnow() - feed.last_fetch_at).days
days_since_fetch = (now - feed.last_fetch_at).days
# 获取最近 7 天抓取记录
week_ago = now - timedelta(days=7)
recent_logs = db.query(FetchLog).filter(
FetchLog.feed_id == feed.id,
FetchLog.created_at >= datetime.utcnow() - timedelta(days=7)
FetchLog.created_at >= week_ago
).order_by(FetchLog.created_at.desc()).limit(10).all()
health = feed.health_status()
health = feed.health_status(now=now)
results.append({
"id": feed.id,
@@ -76,14 +79,14 @@ def get_overall_stats(db: Session) -> Dict:
"""获取整体统计信息"""
total_feeds = db.query(Feed).count()
active_feeds = db.query(Feed).filter(Feed.is_active == True).count()
total_articles = db.query(Feed).with_entities(Feed.article_count).all()
total_articles_count = sum(a[0] for a in total_articles) if total_articles else 0
total_articles_count = db.query(func.sum(Feed.article_count)).scalar() or 0
# 健康源统计
feeds = db.query(Feed).all()
healthy = warning = unhealthy = 0
now = datetime.now(timezone.utc)
for feed in feeds:
status = feed.health_status()
status = feed.health_status(now=now)
if status == "healthy":
healthy += 1
elif status == "warning":
@@ -92,8 +95,7 @@ def get_overall_stats(db: Session) -> Dict:
unhealthy += 1
# 今日抓取
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
from models import FetchLog
today = now.replace(hour=0, minute=0, second=0, microsecond=0)
today_fetches = db.query(FetchLog).filter(FetchLog.created_at >= today).count()
today_success = db.query(FetchLog).filter(
FetchLog.created_at >= today, FetchLog.status == "success"
+17 -7
View File
@@ -35,13 +35,17 @@ app = FastAPI(
lifespan=lifespan,
)
# CORS
# CORS — 仅允许同源和开发环境
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
allow_origins=[
"http://localhost:7329",
"http://localhost:7330",
"http://127.0.0.1:7329",
],
allow_credentials=False,
allow_methods=["GET", "POST", "PUT", "DELETE"],
allow_headers=["Content-Type", "Authorization", "X-API-Key"],
)
# API 路由
@@ -62,11 +66,17 @@ static_dir = os.path.join(config.BASE_DIR, "static")
if os.path.exists(static_dir):
app.mount("/static", StaticFiles(directory=static_dir), name="static")
# API 路径白名单 — 这些路径不应被 SPA 兜底
_API_PATHS = {
"api", "docs", "openapi.json", "redoc",
}
@app.get("/{full_path:path}")
async def serve_spa(full_path: str):
"""Vue SPA 路由回退"""
# API 路由不走这里
if full_path.startswith("api/") or full_path.startswith("docs") or full_path.startswith("openapi.json"):
# API/文档路由不走 SPA 兜底
first_seg = full_path.split("/")[0] if full_path else ""
if first_seg in _API_PATHS:
return {"detail": "Not found"}
index_path = os.path.join(static_dir, "index.html")
+9 -6
View File
@@ -1,5 +1,5 @@
"""SQLAlchemy 数据模型"""
from datetime import datetime
from datetime import datetime, timezone
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey
from sqlalchemy.orm import relationship
from database import Base
@@ -25,13 +25,13 @@ class Feed(Base):
fail_count = Column(Integer, default=0)
article_count = Column(Integer, default=0)
created_at = Column(DateTime, default=datetime.utcnow)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
# 关联
articles = relationship("Article", back_populates="feed", cascade="all, delete-orphan")
fetch_logs = relationship("FetchLog", back_populates="feed", cascade="all, delete-orphan")
def health_status(self):
def health_status(self, now: datetime = None):
"""计算健康度
🟢 健康: 成功率 >= 90%, 最近7天有更新
🟡 警告: 成功率 50%-90%, 或超过3天未更新
@@ -43,9 +43,12 @@ class Feed(Base):
success_rate = self.success_count / total
if now is None:
now = datetime.now(timezone.utc)
days_since_last_fetch = None
if self.last_fetch_at:
days_since_last_fetch = (datetime.utcnow() - self.last_fetch_at).days
days_since_last_fetch = (now - self.last_fetch_at).days
if success_rate >= 0.9 and (days_since_last_fetch is None or days_since_last_fetch <= 7):
return "healthy"
@@ -68,7 +71,7 @@ class Article(Base):
content = Column(Text, default="")
summary = Column(Text, default="")
is_read = Column(Boolean, default=False)
created_at = Column(DateTime, default=datetime.utcnow, index=True)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
# 关联
feed = relationship("Feed", back_populates="articles")
@@ -84,7 +87,7 @@ class FetchLog(Base):
articles_fetched = Column(Integer, default=0)
error_message = Column(Text, default="")
response_time_ms = Column(Integer, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, index=True)
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
# 关联
feed = relationship("Feed", back_populates="fetch_logs")
+9 -9
View File
@@ -1,9 +1,9 @@
fastapi>=0.110.0
uvicorn[standard]>=0.29.0
sqlalchemy>=2.0.0
pydantic>=2.6.0
feedparser>=6.0.11
requests>=2.31.0
beautifulsoup4>=4.12.0
apscheduler>=3.10.4
lxml>=5.1.0
fastapi==0.115.0
uvicorn[standard]==0.32.0
sqlalchemy==2.0.36
pydantic==2.9.2
feedparser==6.0.11
requests==2.32.3
beautifulsoup4==4.12.3
apscheduler==3.10.4
lxml==5.3.0
+1 -2
View File
@@ -1,6 +1,6 @@
"""文章管理 API"""
from typing import Optional
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy.orm import Session
from sqlalchemy import desc
@@ -130,4 +130,3 @@ def fulltext_search(
return {"total": total, "items": results}
from fastapi import HTTPException
+3 -3
View File
@@ -1,6 +1,6 @@
"""对外 API(供 AI/外部系统调用)"""
from typing import Optional
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import desc
@@ -21,7 +21,7 @@ def get_recent_articles(
"""获取最近 N 小时的文章
这是对外提供给 AI 分析的主要接口
"""
since = datetime.utcnow() - timedelta(hours=hours)
since = datetime.now(timezone.utc) - timedelta(hours=hours)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
@@ -136,7 +136,7 @@ def get_daily_summary(
except ValueError:
return {"error": "Invalid date format, use YYYY-MM-DD"}
else:
day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
day = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
next_day = day + timedelta(days=1)
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
+27 -9
View File
@@ -1,6 +1,6 @@
"""RSS 源管理 API"""
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
from pydantic import BaseModel, HttpUrl
from sqlalchemy.orm import Session
from database import get_db
@@ -103,7 +103,11 @@ def list_categories(db: Session = Depends(get_db)):
@router.post("", response_model=dict)
def create_feed(data: FeedCreate, db: Session = Depends(get_db)):
def create_feed(
data: FeedCreate,
background_tasks: BackgroundTasks,
db: Session = Depends(get_db),
):
"""添加 RSS 源"""
# 检查是否已存在
existing = db.query(Feed).filter(Feed.url == data.url).first()
@@ -126,10 +130,10 @@ def create_feed(data: FeedCreate, db: Session = Depends(get_db)):
if feed.is_active:
add_feed_job(feed.id, feed.fetch_interval_minutes)
# 立即抓取一次
fetch_and_store_feed(feed.id)
# 后台异步首次抓取,不阻塞 HTTP 响应
background_tasks.add_task(fetch_and_store_feed, feed.id)
return {"id": feed.id, "message": "RSS 源添加成功", "url": feed.url}
return {"id": feed.id, "message": "RSS 源添加成功,正在后台抓取", "url": feed.url}
@router.post("/discover")
@@ -217,13 +221,25 @@ def trigger_fetch(feed_id: int, db: Session = Depends(get_db)):
return result
class OpmlImport(BaseModel):
opml_content: str
@router.post("/import-opml")
def import_opml(opml_content: str, db: Session = Depends(get_db)):
def import_opml(data: OpmlImport, db: Session = Depends(get_db)):
"""导入 OPML 文件内容"""
import xml.etree.ElementTree as ET
content = data.opml_content.strip()
if not content:
raise HTTPException(status_code=400, detail="OPML 内容不能为空")
# 限制大小(防止滥用)
if len(content) > 5_000_000: # 5MB
raise HTTPException(status_code=413, detail="OPML 文件过大")
try:
root = ET.fromstring(opml_content)
root = ET.fromstring(content)
except ET.ParseError:
raise HTTPException(status_code=400, detail="无效的 OPML 文件")
@@ -261,12 +277,14 @@ def import_opml(opml_content: str, db: Session = Depends(get_db)):
@router.get("/export-opml")
def export_opml(db: Session = Depends(get_db)):
"""导出 OPML 文件内容"""
from xml.sax.saxutils import escape
feeds = db.query(Feed).all()
lines = ['<?xml version="1.0" encoding="UTF-8"?>', '<opml version="2.0">', '<head><title>rssKeeper Feeds</title></head>', '<body>']
for feed in feeds:
title = (feed.title or feed.url).replace('"', '&quot;')
lines.append(f' <outline type="rss" text="{title}" xmlUrl="{feed.url}" />')
title = escape(feed.title or feed.url, {'"': '&quot;'})
url = escape(feed.url)
lines.append(f' <outline type="rss" text="{title}" xmlUrl="{url}" />')
lines.append('</body>')
lines.append('</opml>')
+49 -26
View File
@@ -2,7 +2,6 @@
import time
import re
import html
import hashlib
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin
@@ -99,16 +98,16 @@ def parse_article(entry, feed_id: int) -> dict:
link = entry.get("link", "")
author = entry.get("author", "")
# 发布时间
# 发布时间 — 统一存为 UTC aware datetime
published_at = None
if hasattr(entry, "published_parsed") and entry.published_parsed:
try:
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
except (ValueError, TypeError):
pass
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
try:
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
except (ValueError, TypeError):
pass
@@ -172,9 +171,14 @@ def generate_summary(content: str, max_length: int = 300) -> str:
if len(text) <= max_length:
return text
# 在句子边界截断
# 在句子边界截断(支持中英文标点)
truncated = text[:max_length]
last_period = max(truncated.rfind(""), truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? "))
last_period = max(
truncated.rfind(""), truncated.rfind(". "),
truncated.rfind("! "), truncated.rfind("? "),
truncated.rfind(""), truncated.rfind(""),
truncated.rfind(""),
)
if last_period > max_length * 0.5:
return truncated[:last_period + 1]
@@ -195,7 +199,7 @@ def fetch_and_store_feed(feed_id: int) -> dict:
if not result["success"]:
# 记录失败
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_status = "fail"
feed.last_error = result["error"]
feed.fail_count += 1
@@ -218,34 +222,53 @@ def fetch_and_store_feed(feed_id: int) -> dict:
if hasattr(parsed.feed, "description"):
feed.description = parsed.feed.description[:1000]
# 存储文章
new_count = 0
# 存储文章 — 先收集所有文章,内存去重后批量入库
seen_links = set()
articles_to_add = []
articles_to_update = []
for entry in parsed.entries:
article_data = parse_article(entry, feed_id)
if not article_data["link"]:
link = article_data.get("link", "")
if not link or link in seen_links:
continue
seen_links.add(link)
articles_to_add.append(article_data)
# 检查是否已存在(基于 link
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
if existing:
# 更新已有文章
existing.title = article_data["title"] or existing.title
existing.content = article_data["content"] or existing.content
existing.summary = article_data["summary"] or existing.summary
existing.author = article_data["author"] or existing.author
if article_data["published_at"]:
existing.published_at = article_data["published_at"]
else:
article = Article(**article_data)
db.add(article)
new_count += 1
# 批量查询已有文章
if articles_to_add:
existing_links = {
row[0] for row in db.query(Article.link).filter(
Article.link.in_([a["link"] for a in articles_to_add])
).all()
}
new_count = 0
for article_data in articles_to_add:
if article_data["link"] in existing_links:
articles_to_update.append(article_data)
else:
article = Article(**article_data)
db.add(article)
new_count += 1
# 更新已有文章
for article_data in articles_to_update:
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
if existing:
existing.title = article_data["title"] or existing.title
existing.content = article_data["content"] or existing.content
existing.summary = article_data["summary"] or existing.summary
existing.author = article_data["author"] or existing.author
if article_data["published_at"]:
existing.published_at = article_data["published_at"]
# 更新 feed 统计
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_status = "success"
feed.last_error = ""
feed.success_count += 1
feed.article_count = db.query(Article).filter(Article.feed_id == feed_id).count()
feed.article_count += new_count
log = FetchLog(
feed_id=feed_id,