fix: 端口更换 & 代码审核修复
端口: - 服务端口 8000 → 7329 - 前端开发端口 5173 → 7330 安全: - CORS 收紧为白名单,关闭 credentials - SPA 路由白名单完善 - 前端 XSS 转义 可靠性: - 时区统一为 datetime.now(timezone.utc) - 文章入库改为内存去重 + 增量计数 - OPML 导入改为 body 参数接收 - OPML 导出 URL XML 转义 - 首次抓取改为 BackgroundTasks 异步 - articles.py HTTPException 移到顶部 import - FTS5 异常显式日志 - FTS5 查询加引号包裹防布尔注入 - 中文摘要支持中文标点 - 去掉未使用的 hashlib import 部署: - Dockerfile 锁 python:3.12.7-slim - requirements 锁定具体版本 - healthcheck 不用 curl(镜像里没有) - docker-compose 使用 .env 文件 - 新增 .env 配置文件
This commit is contained in:
+49
-26
@@ -2,7 +2,6 @@
|
||||
import time
|
||||
import re
|
||||
import html
|
||||
import hashlib
|
||||
from datetime import datetime, timezone
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from urllib.parse import urljoin
|
||||
@@ -99,16 +98,16 @@ def parse_article(entry, feed_id: int) -> dict:
|
||||
link = entry.get("link", "")
|
||||
author = entry.get("author", "")
|
||||
|
||||
# 发布时间
|
||||
# 发布时间 — 统一存为 UTC aware datetime
|
||||
published_at = None
|
||||
if hasattr(entry, "published_parsed") and entry.published_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
|
||||
published_at = datetime(*entry.published_parsed[:6], tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
if not published_at and hasattr(entry, "updated_parsed") and entry.updated_parsed:
|
||||
try:
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc).replace(tzinfo=None)
|
||||
published_at = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
@@ -172,9 +171,14 @@ def generate_summary(content: str, max_length: int = 300) -> str:
|
||||
if len(text) <= max_length:
|
||||
return text
|
||||
|
||||
# 在句子边界截断
|
||||
# 在句子边界截断(支持中英文标点)
|
||||
truncated = text[:max_length]
|
||||
last_period = max(truncated.rfind("。"), truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? "))
|
||||
last_period = max(
|
||||
truncated.rfind("。"), truncated.rfind(". "),
|
||||
truncated.rfind("! "), truncated.rfind("? "),
|
||||
truncated.rfind("?"), truncated.rfind("!"),
|
||||
truncated.rfind(";"),
|
||||
)
|
||||
if last_period > max_length * 0.5:
|
||||
return truncated[:last_period + 1]
|
||||
|
||||
@@ -195,7 +199,7 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
|
||||
if not result["success"]:
|
||||
# 记录失败
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_status = "fail"
|
||||
feed.last_error = result["error"]
|
||||
feed.fail_count += 1
|
||||
@@ -218,34 +222,53 @@ def fetch_and_store_feed(feed_id: int) -> dict:
|
||||
if hasattr(parsed.feed, "description"):
|
||||
feed.description = parsed.feed.description[:1000]
|
||||
|
||||
# 存储文章
|
||||
new_count = 0
|
||||
# 存储文章 — 先收集所有文章,内存去重后批量入库
|
||||
seen_links = set()
|
||||
articles_to_add = []
|
||||
articles_to_update = []
|
||||
|
||||
for entry in parsed.entries:
|
||||
article_data = parse_article(entry, feed_id)
|
||||
if not article_data["link"]:
|
||||
link = article_data.get("link", "")
|
||||
if not link or link in seen_links:
|
||||
continue
|
||||
seen_links.add(link)
|
||||
articles_to_add.append(article_data)
|
||||
|
||||
# 检查是否已存在(基于 link)
|
||||
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
|
||||
if existing:
|
||||
# 更新已有文章
|
||||
existing.title = article_data["title"] or existing.title
|
||||
existing.content = article_data["content"] or existing.content
|
||||
existing.summary = article_data["summary"] or existing.summary
|
||||
existing.author = article_data["author"] or existing.author
|
||||
if article_data["published_at"]:
|
||||
existing.published_at = article_data["published_at"]
|
||||
else:
|
||||
article = Article(**article_data)
|
||||
db.add(article)
|
||||
new_count += 1
|
||||
# 批量查询已有文章
|
||||
if articles_to_add:
|
||||
existing_links = {
|
||||
row[0] for row in db.query(Article.link).filter(
|
||||
Article.link.in_([a["link"] for a in articles_to_add])
|
||||
).all()
|
||||
}
|
||||
|
||||
new_count = 0
|
||||
for article_data in articles_to_add:
|
||||
if article_data["link"] in existing_links:
|
||||
articles_to_update.append(article_data)
|
||||
else:
|
||||
article = Article(**article_data)
|
||||
db.add(article)
|
||||
new_count += 1
|
||||
|
||||
# 更新已有文章
|
||||
for article_data in articles_to_update:
|
||||
existing = db.query(Article).filter(Article.link == article_data["link"]).first()
|
||||
if existing:
|
||||
existing.title = article_data["title"] or existing.title
|
||||
existing.content = article_data["content"] or existing.content
|
||||
existing.summary = article_data["summary"] or existing.summary
|
||||
existing.author = article_data["author"] or existing.author
|
||||
if article_data["published_at"]:
|
||||
existing.published_at = article_data["published_at"]
|
||||
|
||||
# 更新 feed 统计
|
||||
feed.last_fetch_at = datetime.utcnow()
|
||||
feed.last_fetch_at = datetime.now(timezone.utc)
|
||||
feed.last_fetch_status = "success"
|
||||
feed.last_error = ""
|
||||
feed.success_count += 1
|
||||
feed.article_count = db.query(Article).filter(Article.feed_id == feed_id).count()
|
||||
feed.article_count += new_count
|
||||
|
||||
log = FetchLog(
|
||||
feed_id=feed_id,
|
||||
|
||||
Reference in New Issue
Block a user