Initial commit: snapAna 截图智能整理工具

包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
wjl
2026-05-27 15:45:50 +08:00
commit 5c028d7952
76 changed files with 10467 additions and 0 deletions
View File
+66
View File
@@ -0,0 +1,66 @@
"""全局配置:路径、数据库、并发参数等。"""
from __future__ import annotations
import os
from pathlib import Path
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
# 默认数据目录:放在 backend/.data 下,便于零配置启动
_BACKEND_ROOT = Path(__file__).resolve().parents[2]
_DEFAULT_DATA_DIR = _BACKEND_ROOT / ".data"
class Settings(BaseSettings):
"""读取 .env 与环境变量的全局配置。"""
model_config = SettingsConfigDict(
env_file=str(_BACKEND_ROOT / ".env"),
env_file_encoding="utf-8",
extra="ignore",
)
# 应用基础
app_name: str = "snapAna"
debug: bool = False
host: str = "127.0.0.1"
port: int = 8765
# 数据目录
data_dir: Path = Field(default=_DEFAULT_DATA_DIR)
# 任务并发
analyze_concurrency: int = 4
max_retries: int = 3
# 缩略图
thumb_size: int = 320
vlm_max_side: int = 1280 # 上传 VLM 前压缩的长边像素
# CORS
cors_origins: list[str] = ["http://localhost:5173", "http://127.0.0.1:5173"]
@property
def db_path(self) -> Path:
"""SQLite 数据库文件路径。"""
return self.data_dir / "snapana.db"
@property
def db_url(self) -> str:
"""SQLAlchemy 连接串。"""
return f"sqlite:///{self.db_path.as_posix()}"
@property
def thumb_dir(self) -> Path:
"""缩略图缓存目录。"""
return self.data_dir / "thumbs"
def ensure_dirs(self) -> None:
"""确保所有运行期目录存在。"""
self.data_dir.mkdir(parents=True, exist_ok=True)
self.thumb_dir.mkdir(parents=True, exist_ok=True)
settings = Settings()
settings.ensure_dirs()
+153
View File
@@ -0,0 +1,153 @@
"""数据库引擎、会话与初始化。
使用 SQLAlchemy 2.0 + SQLite。FTS5 虚拟表通过原生 SQL 创建,并配套触发器
让 OCR/AI 字段更新时自动同步到全文索引。
"""
from __future__ import annotations
from contextlib import contextmanager
from typing import Iterator
from sqlalchemy import create_engine, event, text
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from app.core.config import settings
class Base(DeclarativeBase):
"""全局声明性 Base。"""
engine = create_engine(
settings.db_url,
echo=False,
future=True,
connect_args={"check_same_thread": False},
)
@event.listens_for(engine, "connect")
def _sqlite_pragmas(dbapi_connection, _connection_record):
"""启用外键、WAL、忙等待等 SQLite 优化项。"""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA synchronous=NORMAL")
cursor.execute("PRAGMA busy_timeout=5000")
cursor.close()
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)
def get_session() -> Iterator[Session]:
"""FastAPI 依赖注入:每个请求一个会话。"""
with SessionLocal() as session:
yield session
@contextmanager
def session_scope() -> Iterator[Session]:
"""常规上下文管理:自动 commit/rollback。"""
session = SessionLocal()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
# FTS5 虚拟表与触发器 SQL(独立维护,便于以后调整字段)
_FTS_SCHEMA_SQL = [
"""
CREATE VIRTUAL TABLE IF NOT EXISTS screenshots_fts
USING fts5(
ocr_text,
ai_title,
ai_summary,
ai_suggestion,
content='screenshot_meta',
content_rowid='screenshot_id',
tokenize='unicode61'
);
""",
"""
CREATE TRIGGER IF NOT EXISTS screenshot_meta_ai
AFTER INSERT ON screenshot_meta BEGIN
INSERT INTO screenshots_fts(rowid, ocr_text, ai_title, ai_summary, ai_suggestion)
VALUES (new.screenshot_id,
coalesce(new.ocr_text, ''),
coalesce(new.ai_title, ''),
coalesce(new.ai_summary, ''),
coalesce(new.ai_suggestion, ''));
END;
""",
"""
CREATE TRIGGER IF NOT EXISTS screenshot_meta_ad
AFTER DELETE ON screenshot_meta BEGIN
INSERT INTO screenshots_fts(screenshots_fts, rowid, ocr_text, ai_title, ai_summary, ai_suggestion)
VALUES('delete', old.screenshot_id,
coalesce(old.ocr_text, ''),
coalesce(old.ai_title, ''),
coalesce(old.ai_summary, ''),
coalesce(old.ai_suggestion, ''));
END;
""",
"""
CREATE TRIGGER IF NOT EXISTS screenshot_meta_au
AFTER UPDATE ON screenshot_meta BEGIN
INSERT INTO screenshots_fts(screenshots_fts, rowid, ocr_text, ai_title, ai_summary, ai_suggestion)
VALUES('delete', old.screenshot_id,
coalesce(old.ocr_text, ''),
coalesce(old.ai_title, ''),
coalesce(old.ai_summary, ''),
coalesce(old.ai_suggestion, ''));
INSERT INTO screenshots_fts(rowid, ocr_text, ai_title, ai_summary, ai_suggestion)
VALUES (new.screenshot_id,
coalesce(new.ocr_text, ''),
coalesce(new.ai_title, ''),
coalesce(new.ai_summary, ''),
coalesce(new.ai_suggestion, ''));
END;
""",
]
def init_db() -> None:
"""启动时建表并装配 FTS5、灌入默认分类。"""
from app.models import register_all # noqa: F401
register_all()
Base.metadata.create_all(engine)
with engine.begin() as conn:
for stmt in _FTS_SCHEMA_SQL:
conn.execute(text(stmt))
_migrate_legacy_schema(conn)
# 启动期 seed 默认分类(即使首次启动也能在「设置」/筛选页看到分类)
from app.services.analyze import ensure_default_categories
ensure_default_categories()
def _migrate_legacy_schema(conn) -> None:
"""轻量迁移:旧版本的 screenshots.category_id 没有外键。
SQLite 不支持 ALTER TABLE 加外键,但删除分类时 ON DELETE SET NULL 失效
会导致悬空引用。检测到旧表时,主动用一次性 SQL 清理掉无效引用并打日志,
建议用户用「分类管理」页重建索引。
"""
pragma_rows = conn.execute(
text("PRAGMA foreign_key_list(screenshots)")
).fetchall()
has_cat_fk = any(row[2] == "categories" for row in pragma_rows)
if not has_cat_fk:
# 清理悬空 category_id,避免列表统计出错
conn.execute(
text(
"UPDATE screenshots SET category_id = NULL "
"WHERE category_id IS NOT NULL "
"AND category_id NOT IN (SELECT id FROM categories)"
)
)
+25
View File
@@ -0,0 +1,25 @@
"""统一日志配置。"""
from __future__ import annotations
import logging
import sys
def setup_logging(debug: bool = False) -> None:
"""初始化根 logger 的格式与级别。"""
level = logging.DEBUG if debug else logging.INFO
fmt = "%(asctime)s | %(levelname)-7s | %(name)s | %(message)s"
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter(fmt))
root = logging.getLogger()
root.handlers.clear()
root.addHandler(handler)
root.setLevel(level)
# 降低第三方库噪音
for noisy in ("watchdog", "httpx", "PIL"):
logging.getLogger(noisy).setLevel(logging.WARNING)
def get_logger(name: str) -> logging.Logger:
"""统一入口获取 logger。"""
return logging.getLogger(name)
+102
View File
@@ -0,0 +1,102 @@
"""跨平台路径工具:重点兼容 Windows UNC 网络路径(\\\\NAS\\share\\...)。"""
from __future__ import annotations
import os
import sys
from pathlib import Path, PureWindowsPath
def normalize_user_path(raw: str) -> str:
"""规范化用户输入的路径,保留 UNC 反斜杠格式。
示例:
- \\\\JIULUGNAS\\personal_folder\\Photos -> 原样保留
- //JIULUGNAS/personal_folder/Photos -> 转为 UNC
- D:/Pictures/Screenshots -> D:\\Pictures\\Screenshots
"""
raw = (raw or "").strip().strip('"').strip("'")
if not raw:
return raw
if sys.platform == "win32":
# //server/share -> \\server\share
if raw.startswith("//") and not raw.startswith("///"):
raw = "\\\\" + raw.lstrip("/").replace("/", "\\")
elif raw.startswith("\\\\"):
pass
else:
raw = raw.replace("/", "\\")
return str(PureWindowsPath(raw))
return str(Path(raw).expanduser())
def path_from_storage(stored: str) -> Path:
"""从数据库读出的路径转为 Path(修复历史 as_posix 导致的 //NAS/...)。"""
if not stored:
return Path(stored)
if sys.platform == "win32":
# 历史数据://JIULUGNAS/foo/bar -> \\JIULUGNAS\foo\bar
if stored.startswith("//") and not stored.startswith("///"):
stored = "\\\\" + stored.lstrip("/").replace("/", "\\")
return Path(stored)
def path_to_storage(path: Path | str) -> str:
"""写入数据库 / 比较用的路径字符串;Windows 下保留反斜杠。"""
if isinstance(path, Path):
if sys.platform == "win32":
return str(path)
return path.as_posix()
return normalize_user_path(str(path)) if sys.platform == "win32" else str(path)
def is_accessible_dir(path: str | Path) -> bool:
"""目录是否可访问(UNC / 本地均适用)。"""
try:
return os.path.isdir(str(path))
except OSError:
return False
def is_accessible_file(path: str | Path) -> bool:
"""文件是否可访问。"""
try:
return os.path.isfile(str(path))
except OSError:
return False
def path_is_under(parent: str | Path, child: str | Path) -> bool:
"""判断 child 是否在 parent 目录下(用于敏感目录检测)。"""
try:
parent_norm = os.path.normcase(os.path.normpath(str(parent)))
child_norm = os.path.normcase(os.path.normpath(str(child)))
if not parent_norm.endswith(os.sep):
parent_norm += os.sep
return child_norm.startswith(parent_norm) or child_norm == parent_norm.rstrip(os.sep)
except OSError:
return False
def count_files_sample(root: str | Path, limit: int = 5) -> tuple[int, list[str]]:
"""快速抽样统计目录下图片数量(网络路径可能较慢,limit 控制遍历深度)。"""
from app.services.thumbnail import is_supported
root_p = path_from_storage(str(root)) if isinstance(root, str) else root
total = 0
samples: list[str] = []
try:
for dirpath, _, filenames in os.walk(str(root_p)):
for name in filenames:
p = Path(dirpath) / name
if not is_supported(p):
continue
total += 1
if len(samples) < limit:
samples.append(path_to_storage(p))
if total >= 1000:
break
except OSError:
pass
return total, samples