5c028d7952
包含 FastAPI 后端、React 前端、队列/OCR/标签/待办等完整功能。 Co-authored-by: Cursor <cursoragent@cursor.com>
148 lines
4.4 KiB
Python
148 lines
4.4 KiB
Python
"""将磁盘上的截图文件入库 + 排队分析。"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Iterable, Optional
|
|
|
|
from PIL import Image
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.core.path_utils import (
|
|
is_accessible_dir,
|
|
is_accessible_file,
|
|
path_from_storage,
|
|
path_to_storage,
|
|
)
|
|
from app.core.logger import get_logger
|
|
from app.models.job import Job, JobKind, JobStatus
|
|
from app.models.screenshot import ProcessStatus, Screenshot
|
|
from app.models.tag import Tag
|
|
from app.services.exif_utils import extract_image_metadata
|
|
from app.services.thumbnail import file_hash, generate_thumbnail, is_supported
|
|
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
def ingest_path(session: Session, path: Path) -> Optional[Screenshot]:
|
|
"""单文件入库。返回 Screenshot 或 None(不支持/重复时)。"""
|
|
if not is_accessible_file(path) or not path.is_file():
|
|
return None
|
|
if not is_supported(path):
|
|
return None
|
|
|
|
stored_path = path_to_storage(path)
|
|
|
|
try:
|
|
digest = file_hash(path)
|
|
except OSError as exc:
|
|
logger.warning("无法读取文件 %s: %s", path, exc)
|
|
return None
|
|
|
|
existing = session.scalar(select(Screenshot).where(Screenshot.file_hash == digest))
|
|
if existing:
|
|
# 同一内容重命名/移动:更新路径
|
|
if existing.path != stored_path:
|
|
existing.path = stored_path
|
|
session.flush()
|
|
return existing
|
|
|
|
try:
|
|
with Image.open(path) as img:
|
|
width, height = img.size
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning("无法读取图片尺寸 %s: %s", path, exc)
|
|
width, height = 0, 0
|
|
|
|
stat = path.stat()
|
|
captured_at = datetime.fromtimestamp(stat.st_mtime)
|
|
exif_time, location_tags = extract_image_metadata(path)
|
|
if exif_time is not None:
|
|
captured_at = exif_time
|
|
|
|
try:
|
|
thumb = generate_thumbnail(path)
|
|
thumb_path = thumb.as_posix()
|
|
except Exception as exc: # noqa: BLE001
|
|
logger.warning("生成缩略图失败 %s: %s", path, exc)
|
|
thumb_path = None
|
|
|
|
shot = Screenshot(
|
|
path=stored_path,
|
|
file_hash=digest,
|
|
width=width,
|
|
height=height,
|
|
size=stat.st_size,
|
|
captured_at=captured_at,
|
|
thumb_path=thumb_path,
|
|
ocr_status=ProcessStatus.PENDING.value,
|
|
ai_status=ProcessStatus.PENDING.value,
|
|
)
|
|
session.add(shot)
|
|
session.flush()
|
|
|
|
if location_tags:
|
|
_attach_location_tags(session, shot, location_tags)
|
|
|
|
job = Job(screenshot_id=shot.id, kind=JobKind.FULL.value, status=JobStatus.PENDING.value)
|
|
session.add(job)
|
|
logger.info("入库 #%d %s", shot.id, path.name)
|
|
return shot
|
|
|
|
|
|
def _attach_location_tags(session: Session, shot: Screenshot, tag_names: list[str]) -> None:
|
|
"""入库时写入 EXIF 地点标签。"""
|
|
tag_objs: list[Tag] = []
|
|
for raw in tag_names:
|
|
name = (raw or "").strip()[:64]
|
|
if not name:
|
|
continue
|
|
tag = session.scalar(select(Tag).where(Tag.name == name))
|
|
if tag is None:
|
|
tag = Tag(name=name)
|
|
session.add(tag)
|
|
session.flush()
|
|
tag_objs.append(tag)
|
|
shot.tags = tag_objs
|
|
|
|
|
|
def ingest_directory(
|
|
session: Session,
|
|
root: Path | str,
|
|
recursive: bool = True,
|
|
) -> tuple[int, int]:
|
|
"""遍历目录入库。返回 (新增数, 跳过数)。支持 UNC 网络路径。"""
|
|
root_p = path_from_storage(str(root)) if isinstance(root, str) else root
|
|
if not is_accessible_dir(root_p):
|
|
return 0, 0
|
|
|
|
iterator: Iterable[Path]
|
|
if recursive:
|
|
iterator = (p for p in root_p.rglob("*") if p.is_file())
|
|
else:
|
|
iterator = (p for p in root_p.iterdir() if p.is_file())
|
|
|
|
added, skipped = 0, 0
|
|
for path in iterator:
|
|
if not is_supported(path):
|
|
continue
|
|
stored = path_to_storage(path)
|
|
before = session.scalar(
|
|
select(Screenshot.id).where(Screenshot.path == stored)
|
|
)
|
|
result = ingest_path(session, path)
|
|
if result is None:
|
|
skipped += 1
|
|
continue
|
|
if before is None:
|
|
added += 1
|
|
else:
|
|
skipped += 1
|
|
# 批量提交,避免巨型事务
|
|
if (added + skipped) % 50 == 0:
|
|
session.commit()
|
|
session.commit()
|
|
return added, skipped
|