fix: 端口更换 & 代码审核修复
端口: - 服务端口 8000 → 7329 - 前端开发端口 5173 → 7330 安全: - CORS 收紧为白名单,关闭 credentials - SPA 路由白名单完善 - 前端 XSS 转义 可靠性: - 时区统一为 datetime.now(timezone.utc) - 文章入库改为内存去重 + 增量计数 - OPML 导入改为 body 参数接收 - OPML 导出 URL XML 转义 - 首次抓取改为 BackgroundTasks 异步 - articles.py HTTPException 移到顶部 import - FTS5 异常显式日志 - FTS5 查询加引号包裹防布尔注入 - 中文摘要支持中文标点 - 去掉未使用的 hashlib import 部署: - Dockerfile 锁 python:3.12.7-slim - requirements 锁定具体版本 - healthcheck 不用 curl(镜像里没有) - docker-compose 使用 .env 文件 - 新增 .env 配置文件
This commit is contained in:
+7
-3
@@ -43,15 +43,19 @@ def init_fts5():
|
||||
conn = engine.raw_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 检查 FTS5 扩展是否可用
|
||||
try:
|
||||
cursor.execute("SELECT sqlite_compileoption_used('ENABLE_FTS5')")
|
||||
has_fts5 = cursor.fetchone()[0]
|
||||
if not has_fts5:
|
||||
print("警告: SQLite 未启用 FTS5 扩展,全文搜索将不可用")
|
||||
logger.warning("SQLite 未启用 FTS5 扩展,全文搜索将不可用")
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"FTS5 检测失败: {e}")
|
||||
return
|
||||
|
||||
# 创建 FTS5 虚拟表
|
||||
cursor.execute("""
|
||||
|
||||
@@ -10,8 +10,13 @@ def search_articles(query: str, limit: int = 50, offset: int = 0):
|
||||
if not query or not query.strip():
|
||||
return [], 0
|
||||
|
||||
# 转义 FTS5 特殊字符
|
||||
# 转义 FTS5 特殊字符(双引号、* 等)
|
||||
# 简单策略:将用户查询视为一个整体短语,加引号包裹
|
||||
query = query.replace('"', '""').strip()
|
||||
if not query:
|
||||
return [], 0
|
||||
# 用双引号包裹,避免 FTS5 布尔操作符被误解析
|
||||
query = f'"{query}"'
|
||||
|
||||
conn = engine.raw_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""RSS 源健康度检测"""
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Dict
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
from models import Feed, FetchLog
|
||||
|
||||
@@ -9,6 +10,7 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
|
||||
"""获取 RSS 源健康度信息
|
||||
返回每个源的健康状态详情
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
query = db.query(Feed)
|
||||
if feed_id:
|
||||
query = query.filter(Feed.id == feed_id)
|
||||
@@ -22,15 +24,16 @@ def get_feed_health(db: Session, feed_id: int = None) -> List[Dict]:
|
||||
|
||||
days_since_fetch = None
|
||||
if feed.last_fetch_at:
|
||||
days_since_fetch = (datetime.utcnow() - feed.last_fetch_at).days
|
||||
days_since_fetch = (now - feed.last_fetch_at).days
|
||||
|
||||
# 获取最近 7 天抓取记录
|
||||
week_ago = now - timedelta(days=7)
|
||||
recent_logs = db.query(FetchLog).filter(
|
||||
FetchLog.feed_id == feed.id,
|
||||
FetchLog.created_at >= datetime.utcnow() - timedelta(days=7)
|
||||
FetchLog.created_at >= week_ago
|
||||
).order_by(FetchLog.created_at.desc()).limit(10).all()
|
||||
|
||||
health = feed.health_status()
|
||||
health = feed.health_status(now=now)
|
||||
|
||||
results.append({
|
||||
"id": feed.id,
|
||||
@@ -76,14 +79,14 @@ def get_overall_stats(db: Session) -> Dict:
|
||||
"""获取整体统计信息"""
|
||||
total_feeds = db.query(Feed).count()
|
||||
active_feeds = db.query(Feed).filter(Feed.is_active == True).count()
|
||||
total_articles = db.query(Feed).with_entities(Feed.article_count).all()
|
||||
total_articles_count = sum(a[0] for a in total_articles) if total_articles else 0
|
||||
total_articles_count = db.query(func.sum(Feed.article_count)).scalar() or 0
|
||||
|
||||
# 健康源统计
|
||||
feeds = db.query(Feed).all()
|
||||
healthy = warning = unhealthy = 0
|
||||
now = datetime.now(timezone.utc)
|
||||
for feed in feeds:
|
||||
status = feed.health_status()
|
||||
status = feed.health_status(now=now)
|
||||
if status == "healthy":
|
||||
healthy += 1
|
||||
elif status == "warning":
|
||||
@@ -92,8 +95,7 @@ def get_overall_stats(db: Session) -> Dict:
|
||||
unhealthy += 1
|
||||
|
||||
# 今日抓取
|
||||
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
from models import FetchLog
|
||||
today = now.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
today_fetches = db.query(FetchLog).filter(FetchLog.created_at >= today).count()
|
||||
today_success = db.query(FetchLog).filter(
|
||||
FetchLog.created_at >= today, FetchLog.status == "success"
|
||||
|
||||
+17
-7
@@ -35,13 +35,17 @@ app = FastAPI(
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS
|
||||
# CORS — 仅允许同源和开发环境
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
allow_origins=[
|
||||
"http://localhost:7329",
|
||||
"http://localhost:7330",
|
||||
"http://127.0.0.1:7329",
|
||||
],
|
||||
allow_credentials=False,
|
||||
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
||||
allow_headers=["Content-Type", "Authorization", "X-API-Key"],
|
||||
)
|
||||
|
||||
# API 路由
|
||||
@@ -62,11 +66,17 @@ static_dir = os.path.join(config.BASE_DIR, "static")
|
||||
if os.path.exists(static_dir):
|
||||
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
||||
|
||||
# API 路径白名单 — 这些路径不应被 SPA 兜底
|
||||
_API_PATHS = {
|
||||
"api", "docs", "openapi.json", "redoc",
|
||||
}
|
||||
|
||||
@app.get("/{full_path:path}")
|
||||
async def serve_spa(full_path: str):
|
||||
"""Vue SPA 路由回退"""
|
||||
# API 路由不走这里
|
||||
if full_path.startswith("api/") or full_path.startswith("docs") or full_path.startswith("openapi.json"):
|
||||
# API/文档路由不走 SPA 兜底
|
||||
first_seg = full_path.split("/")[0] if full_path else ""
|
||||
if first_seg in _API_PATHS:
|
||||
return {"detail": "Not found"}
|
||||
|
||||
index_path = os.path.join(static_dir, "index.html")
|
||||
|
||||
+9
-6
@@ -1,5 +1,5 @@
|
||||
"""SQLAlchemy 数据模型"""
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import Column, Integer, String, Text, Boolean, DateTime, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from database import Base
|
||||
@@ -25,13 +25,13 @@ class Feed(Base):
|
||||
fail_count = Column(Integer, default=0)
|
||||
article_count = Column(Integer, default=0)
|
||||
|
||||
created_at = Column(DateTime, default=datetime.utcnow)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
||||
|
||||
# 关联
|
||||
articles = relationship("Article", back_populates="feed", cascade="all, delete-orphan")
|
||||
fetch_logs = relationship("FetchLog", back_populates="feed", cascade="all, delete-orphan")
|
||||
|
||||
def health_status(self):
|
||||
def health_status(self, now: datetime = None):
|
||||
"""计算健康度
|
||||
🟢 健康: 成功率 >= 90%, 最近7天有更新
|
||||
🟡 警告: 成功率 50%-90%, 或超过3天未更新
|
||||
@@ -43,9 +43,12 @@ class Feed(Base):
|
||||
|
||||
success_rate = self.success_count / total
|
||||
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
days_since_last_fetch = None
|
||||
if self.last_fetch_at:
|
||||
days_since_last_fetch = (datetime.utcnow() - self.last_fetch_at).days
|
||||
days_since_last_fetch = (now - self.last_fetch_at).days
|
||||
|
||||
if success_rate >= 0.9 and (days_since_last_fetch is None or days_since_last_fetch <= 7):
|
||||
return "healthy"
|
||||
@@ -68,7 +71,7 @@ class Article(Base):
|
||||
content = Column(Text, default="")
|
||||
summary = Column(Text, default="")
|
||||
is_read = Column(Boolean, default=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||
|
||||
# 关联
|
||||
feed = relationship("Feed", back_populates="articles")
|
||||
@@ -84,7 +87,7 @@ class FetchLog(Base):
|
||||
articles_fetched = Column(Integer, default=0)
|
||||
error_message = Column(Text, default="")
|
||||
response_time_ms = Column(Integer, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, index=True)
|
||||
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc), index=True)
|
||||
|
||||
# 关联
|
||||
feed = relationship("Feed", back_populates="fetch_logs")
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
fastapi>=0.110.0
|
||||
uvicorn[standard]>=0.29.0
|
||||
sqlalchemy>=2.0.0
|
||||
pydantic>=2.6.0
|
||||
feedparser>=6.0.11
|
||||
requests>=2.31.0
|
||||
beautifulsoup4>=4.12.0
|
||||
apscheduler>=3.10.4
|
||||
lxml>=5.1.0
|
||||
fastapi==0.115.0
|
||||
uvicorn[standard]==0.32.0
|
||||
sqlalchemy==2.0.36
|
||||
pydantic==2.9.2
|
||||
feedparser==6.0.11
|
||||
requests==2.32.3
|
||||
beautifulsoup4==4.12.3
|
||||
apscheduler==3.10.4
|
||||
lxml==5.3.0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""文章管理 API"""
|
||||
from typing import Optional
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import desc
|
||||
@@ -130,4 +130,3 @@ def fulltext_search(
|
||||
return {"total": total, "items": results}
|
||||
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""对外 API(供 AI/外部系统调用)"""
|
||||
from typing import Optional
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import desc
|
||||
@@ -21,7 +21,7 @@ def get_recent_articles(
|
||||
"""获取最近 N 小时的文章
|
||||
这是对外提供给 AI 分析的主要接口
|
||||
"""
|
||||
since = datetime.utcnow() - timedelta(hours=hours)
|
||||
since = datetime.now(timezone.utc) - timedelta(hours=hours)
|
||||
|
||||
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
|
||||
|
||||
@@ -136,7 +136,7 @@ def get_daily_summary(
|
||||
except ValueError:
|
||||
return {"error": "Invalid date format, use YYYY-MM-DD"}
|
||||
else:
|
||||
day = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
day = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||
next_day = day + timedelta(days=1)
|
||||
|
||||
query = db.query(Article, Feed.title.label("feed_title"), Feed.category.label("category")).join(Feed)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""RSS 源管理 API"""
|
||||
from typing import List, Optional
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from sqlalchemy.orm import Session
|
||||
from database import get_db
|
||||
@@ -103,7 +103,11 @@ def list_categories(db: Session = Depends(get_db)):
|
||||
|
||||
|
||||
@router.post("", response_model=dict)
|
||||
def create_feed(data: FeedCreate, db: Session = Depends(get_db)):
|
||||
def create_feed(
|
||||
data: FeedCreate,
|
||||
background_tasks: BackgroundTasks,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""添加 RSS 源"""
|
||||
# 检查是否已存在
|
||||
existing = db.query(Feed).filter(Feed.url == data.url).first()
|
||||
@@ -126,10 +130,10 @@ def create_feed(data: FeedCreate, db: Session = Depends(get_db)):
|
||||
if feed.is_active:
|
||||
add_feed_job(feed.id, feed.fetch_interval_minutes)
|
||||
|
||||
# 立即抓取一次
|
||||
fetch_and_store_feed(feed.id)
|
||||
# 后台异步首次抓取,不阻塞 HTTP 响应
|
||||
background_tasks.add_task(fetch_and_store_feed, feed.id)
|
||||
|
||||
return {"id": feed.id, "message": "RSS 源添加成功", "url": feed.url}
|
||||
return {"id": feed.id, "message": "RSS 源添加成功,正在后台抓取", "url": feed.url}
|
||||
|
||||
|
||||
@router.post("/discover")
|
||||
@@ -217,13 +221,25 @@ def trigger_fetch(feed_id: int, db: Session = Depends(get_db)):
|
||||
return result
|
||||
|
||||
|
||||
class OpmlImport(BaseModel):
|
||||
opml_content: str
|
||||
|
||||
|
||||
@router.post("/import-opml")
|
||||
def import_opml(opml_content: str, db: Session = Depends(get_db)):
|
||||
def import_opml(data: OpmlImport, db: Session = Depends(get_db)):
|
||||
"""导入 OPML 文件内容"""
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
content = data.opml_content.strip()
|
||||
if not content:
|
||||
raise HTTPException(status_code=400, detail="OPML 内容不能为空")
|
||||
|
||||
# 限制大小(防止滥用)
|
||||
if len(content) > 5_000_000: # 5MB
|
||||
raise HTTPException(status_code=413, detail="OPML 文件过大")
|
||||
|
||||
try:
|
||||
root = ET.fromstring(opml_content)
|
||||
root = ET.fromstring(content)
|
||||
except ET.ParseError:
|
||||
raise HTTPException(status_code=400, detail="无效的 OPML 文件")
|
||||
|
||||
@@ -261,12 +277,14 @@ def import_opml(opml_content: str, db: Session = Depends(get_db)):
|
||||
@router.get("/export-opml")
|
||||
def export_opml(db: Session = Depends(get_db)):
|
||||
"""导出 OPML 文件内容"""
|
||||
from xml.sax.saxutils import escape
|
||||
feeds = db.query(Feed).all()
|
||||
|
||||
lines = ['<?xml version="1.0" encoding="UTF-8"?>', '<opml version="2.0">', '<head><title>rssKeeper Feeds</title></head>', '<body>']
|
||||
for feed in feeds:
|
||||
title = (feed.title or feed.url).replace('"', '"')
|
||||
lines.append(f' <outline type="rss" text="{title}" xmlUrl="{feed.url}" />')
|
||||
title = escape(feed.title or feed.url, {'"': '"'})
|
||||
url = escape(feed.url)
|
||||
lines.append(f' <outline type="rss" text="{title}" xmlUrl="{url}" />')
|
||||
lines.append('</body>')
|
||||
lines.append('</opml>')
|
||||
|
||||
|
||||
+49
-26
@@ -2,7 +2,6 @@
|
||||
import time
|
||||
import re
|
||||
import html
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import urljoin
|
||||
@@ -99,16 +98,16 @@ def parse_article(entry, feed_id: int) -> dict:
|
||||
link = entry.get("link", "")
|
||||
author = entry.get("author", "")
|
||||
|
||||
# 发布时间
|
||||
# 发布时间 — 统一存为 UTC aware datetime
|
||||
published_at = None
|
||||
if hasattr(entry, "published_parsed") and entry.published_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@@ -172,9 +171,14 @@ def generate_summary(content: str, max_length: int = 300) -> str:
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
|
||||
# 在句子边界截断
|
||||
# 在句子边界截断(支持中英文标点)
|
||||
truncated = text[:max_length]
|
||||
last_period = max(truncated.rfind("。"), truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? "))
|
||||
last_period = max(
|
||||
truncated.rfind("。"), truncated.rfind(". "),
|
||||
truncated.rfind("! "), truncated.rfind("? "),
|
||||
truncated.rfind("?"), truncated.rfind("!"),
|
||||
truncated.rfind(";"),
|
||||
)
|
||||
if last_period > max_length * 0.5:
|
||||
return truncated[:last_period + 1]
|
||||
|
||||
@@ -195,7 +199,7 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
|
||||
if not result["success"]:
|
||||
# 记录失败
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_status = "fail"
|
||||
feed.last_error = result["error"]
|
||||
feed.fail_count += 1
|
||||
@@ -218,34 +222,53 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
if hasattr(parsed.feed, "description"):
|
||||
feed.description = parsed.feed.description[:1000]
|
||||
|
||||
# 存储文章
|
||||
new_count = 0
|
||||
# 存储文章 — 先收集所有文章,内存去重后批量入库
|
||||
seen_links = set()
|
||||
articles_to_add = []
|
||||
articles_to_update = []
|
||||
|
||||
for entry in parsed.entries:
|
||||
article_data = parse_article(entry, feed_id)
|
||||
if not article_data["link"]:
|
||||
link = article_data.get("link", "")
|
||||
if not link or link in seen_links:
|
||||
continue
|
||||
seen_links.add(link)
|
||||
articles_to_add.append(article_data)
|
||||
|
||||
# 检查是否已存在(基于 link)
|
||||
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
|
||||
if existing:
|
||||
# 更新已有文章
|
||||
existing.title = article_data["title"] or existing.title
|
||||
existing.content = article_data["content"] or existing.content
|
||||
existing.summary = article_data["summary"] or existing.summary
|
||||
existing.author = article_data["author"] or existing.author
|
||||
if article_data["published_at"]:
|
||||
existing.published_at = article_data["published_at"]
|
||||
else:
|
||||
article = Article(**article_data)
|
||||
db.add(article)
|
||||
new_count += 1
|
||||
# 批量查询已有文章
|
||||
if articles_to_add:
|
||||
existing_links = {
|
||||
row[0] for row in db.query(Article.link).filter(
|
||||
Article.link.in_([a["link"] for a in articles_to_add])
|
||||
).all()
|
||||
}
|
||||
|
||||
new_count = 0
|
||||
for article_data in articles_to_add:
|
||||
if article_data["link"] in existing_links:
|
||||
articles_to_update.append(article_data)
|
||||
else:
|
||||
article = Article(**article_data)
|
||||
db.add(article)
|
||||
new_count += 1
|
||||
|
||||
# 更新已有文章
|
||||
for article_data in articles_to_update:
|
||||
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
|
||||
if existing:
|
||||
existing.title = article_data["title"] or existing.title
|
||||
existing.content = article_data["content"] or existing.content
|
||||
existing.summary = article_data["summary"] or existing.summary
|
||||
existing.author = article_data["author"] or existing.author
|
||||
if article_data["published_at"]:
|
||||
existing.published_at = article_data["published_at"]
|
||||
|
||||
# 更新 feed 统计
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_status = "success"
|
||||
feed.last_error = ""
|
||||
feed.success_count += 1
|
||||
feed.article_count = db.query(Article).filter(Article.feed_id == feed_id).count()
|
||||
feed.article_count += new_count
|
||||
|
||||
log = FetchLog(
|
||||
feed_id=feed_id,
|
||||
|
||||
Reference in New Issue
Block a user