fix: 端口更换 & 代码审核修复

端口:
- 服务端口 8000 → 7329
- 前端开发端口 5173 → 7330

安全:
- CORS 收紧为白名单,关闭 credentials
- SPA 路由白名单完善
- 前端 XSS 转义

可靠性:
- 时区统一为 datetime.now(timezone.utc)
- 文章入库改为内存去重 + 增量计数
- OPML 导入改为 body 参数接收
- OPML 导出 URL XML 转义
- 首次抓取改为 BackgroundTasks 异步
- articles.py HTTPException 移到顶部 import
- FTS5 异常显式日志
- FTS5 查询加引号包裹防布尔注入
- 中文摘要支持中文标点
- 去掉未使用的 hashlib import

部署:
- Dockerfile 锁 python:3.12.7-slim
- requirements 锁定具体版本
- healthcheck 不用 curl(镜像里没有)
- docker-compose 使用 .env 文件
- 新增 .env 配置文件
This commit is contained in:
congsh
2026-06-11 14:31:29 +08:00
parent 54e7db0ef0
commit c59dd304f7
17 changed files with 701 additions and 106 deletions
+49 -26
View File
@@ -2,7 +2,6 @@
import time
import re
import html
import hashlib
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urljoin
@@ -99,16 +98,16 @@ def parse_article(entry, feed_id: int) -> dict:
link = entry.get("link", "")
author = entry.get("author", "")
# 发布时间
# 发布时间 — 统一存为 UTC aware datetime
published_at = None
if hasattr(entry, "published_parsed") and entry.published_parsed:
try:
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
except (ValueError, TypeError):
pass
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
try:
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
except (ValueError, TypeError):
pass
@@ -172,9 +171,14 @@ def generate_summary(content: str, max_length: int = 300) -> str:
if len(text) <= max_length:
return text
# 在句子边界截断
# 在句子边界截断(支持中英文标点)
truncated = text[:max_length]
last_period = max(truncated.rfind(""), truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? "))
last_period = max(
truncated.rfind(""), truncated.rfind(". "),
truncated.rfind("! "), truncated.rfind("? "),
truncated.rfind(""), truncated.rfind(""),
truncated.rfind(""),
)
if last_period > max_length * 0.5:
return truncated[:last_period + 1]
@@ -195,7 +199,7 @@ def fetch_and_store_feed(feed_id: int) -> dict:
if not result["success"]:
# 记录失败
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_status = "fail"
feed.last_error = result["error"]
feed.fail_count += 1
@@ -218,34 +222,53 @@ def fetch_and_store_feed(feed_id: int) -> dict:
if hasattr(parsed.feed, "description"):
feed.description = parsed.feed.description[:1000]
# 存储文章
new_count = 0
# 存储文章 — 先收集所有文章,内存去重后批量入库
seen_links = set()
articles_to_add = []
articles_to_update = []
for entry in parsed.entries:
article_data = parse_article(entry, feed_id)
if not article_data["link"]:
link = article_data.get("link", "")
if not link or link in seen_links:
continue
seen_links.add(link)
articles_to_add.append(article_data)
# 检查是否已存在(基于 link
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
if existing:
# 更新已有文章
existing.title = article_data["title"] or existing.title
existing.content = article_data["content"] or existing.content
existing.summary = article_data["summary"] or existing.summary
existing.author = article_data["author"] or existing.author
if article_data["published_at"]:
existing.published_at = article_data["published_at"]
else:
article = Article(**article_data)
db.add(article)
new_count += 1
# 批量查询已有文章
if articles_to_add:
existing_links = {
row[0] for row in db.query(Article.link).filter(
Article.link.in_([a["link"] for a in articles_to_add])
).all()
}
new_count = 0
for article_data in articles_to_add:
if article_data["link"] in existing_links:
articles_to_update.append(article_data)
else:
article = Article(**article_data)
db.add(article)
new_count += 1
# 更新已有文章
for article_data in articles_to_update:
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
if existing:
existing.title = article_data["title"] or existing.title
existing.content = article_data["content"] or existing.content
existing.summary = article_data["summary"] or existing.summary
existing.author = article_data["author"] or existing.author
if article_data["published_at"]:
existing.published_at = article_data["published_at"]
# 更新 feed 统计
feed.last_fetch_at = datetime.utcnow()
feed.last_fetch_at = datetime.now(timezone.utc)
feed.last_fetch_status = "success"
feed.last_error = ""
feed.success_count += 1
feed.article_count = db.query(Article).filter(Article.feed_id == feed_id).count()
feed.article_count += new_count
log = FetchLog(
feed_id=feed_id,