"""将磁盘上的截图文件入库 + 排队分析。""" from __future__ import annotations from datetime import datetime from pathlib import Path from typing import Iterable, Optional from PIL import Image from sqlalchemy import select from sqlalchemy.orm import Session from app.core.path_utils import ( is_accessible_dir, is_accessible_file, path_from_storage, path_to_storage, ) from app.core.logger import get_logger from app.models.job import Job, JobKind, JobStatus from app.models.screenshot import ProcessStatus, Screenshot from app.models.tag import Tag from app.services.exif_utils import extract_image_metadata from app.services.thumbnail import file_hash, generate_thumbnail, is_supported logger = get_logger(__name__) def ingest_path(session: Session, path: Path) -> Optional[Screenshot]: """单文件入库。返回 Screenshot 或 None(不支持/重复时)。""" if not is_accessible_file(path) or not path.is_file(): return None if not is_supported(path): return None stored_path = path_to_storage(path) try: digest = file_hash(path) except OSError as exc: logger.warning("无法读取文件 %s: %s", path, exc) return None existing = session.scalar(select(Screenshot).where(Screenshot.file_hash == digest)) if existing: # 同一内容重命名/移动:更新路径 if existing.path != stored_path: existing.path = stored_path session.flush() return existing try: with Image.open(path) as img: width, height = img.size except Exception as exc: # noqa: BLE001 logger.warning("无法读取图片尺寸 %s: %s", path, exc) width, height = 0, 0 stat = path.stat() captured_at = datetime.fromtimestamp(stat.st_mtime) exif_time, location_tags = extract_image_metadata(path) if exif_time is not None: captured_at = exif_time try: thumb = generate_thumbnail(path) thumb_path = thumb.as_posix() except Exception as exc: # noqa: BLE001 logger.warning("生成缩略图失败 %s: %s", path, exc) thumb_path = None shot = Screenshot( path=stored_path, file_hash=digest, width=width, height=height, size=stat.st_size, captured_at=captured_at, thumb_path=thumb_path, ocr_status=ProcessStatus.PENDING.value, ai_status=ProcessStatus.PENDING.value, ) session.add(shot) session.flush() if location_tags: _attach_location_tags(session, shot, location_tags) job = Job(screenshot_id=shot.id, kind=JobKind.FULL.value, status=JobStatus.PENDING.value) session.add(job) logger.info("入库 #%d %s", shot.id, path.name) return shot def _attach_location_tags(session: Session, shot: Screenshot, tag_names: list[str]) -> None: """入库时写入 EXIF 地点标签。""" tag_objs: list[Tag] = [] for raw in tag_names: name = (raw or "").strip()[:64] if not name: continue tag = session.scalar(select(Tag).where(Tag.name == name)) if tag is None: tag = Tag(name=name) session.add(tag) session.flush() tag_objs.append(tag) shot.tags = tag_objs def ingest_directory( session: Session, root: Path | str, recursive: bool = True, ) -> tuple[int, int]: """遍历目录入库。返回 (新增数, 跳过数)。支持 UNC 网络路径。""" root_p = path_from_storage(str(root)) if isinstance(root, str) else root if not is_accessible_dir(root_p): return 0, 0 iterator: Iterable[Path] if recursive: iterator = (p for p in root_p.rglob("*") if p.is_file()) else: iterator = (p for p in root_p.iterdir() if p.is_file()) added, skipped = 0, 0 for path in iterator: if not is_supported(path): continue stored = path_to_storage(path) before = session.scalar( select(Screenshot.id).where(Screenshot.path == stored) ) result = ingest_path(session, path) if result is None: skipped += 1 continue if before is None: added += 1 else: skipped += 1 # 批量提交,避免巨型事务 if (added + skipped) % 50 == 0: session.commit() session.commit() return added, skipped