from __future__ import annotations

import hashlib
import os
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path

from src.errors import UserError
from src.ingest.manifest import Manifest, load_manifest, new_manifest, save_manifest_atomic


ALLOWED_EXTENSIONS = {
    ".jpg",
    ".jpeg",
    ".png",
    ".webp",
    ".tif",
    ".tiff",
    ".heic",
}


@dataclass(frozen=True)
class IngestResult:
    inbox_dir: Path
    out_dir: Path
    manifest_path: Path
    dry_run: bool
    files_found: int
    files_eligible: int
    files_new: int
    files_skipped_existing: int
    files_skipped_duplicate_in_run: int

    def to_human_summary(self) -> str:
        mode = "DRY-RUN" if self.dry_run else "RUN"
        return (
            f"{mode} ingest summary\n"
            f"- inbox: {self.inbox_dir}\n"
            f"- out: {self.out_dir}\n"
            f"- manifest: {self.manifest_path}\n"
            f"- found: {self.files_found}\n"
            f"- eligible: {self.files_eligible}\n"
            f"- new: {self.files_new}\n"
            f"- skipped_existing: {self.files_skipped_existing}\n"
            f"- skipped_duplicate_in_run: {self.files_skipped_duplicate_in_run}"
        )


@dataclass(frozen=True)
class _ScanResult:
    files_found: int
    files_eligible: int
    files_new: int
    files_skipped_existing: int
    files_skipped_duplicate_in_run: int
    pending_additions: dict[str, dict[str, object]]


def ingest_inbox(*, inbox_dir: Path, out_dir: Path, dry_run: bool) -> IngestResult:
    _validate_inbox_dir(inbox_dir)
    manifest_path = out_dir / "manifest.json"
    manifest = _load_or_init_manifest(manifest_path, inbox_dir, out_dir, dry_run)
    scan = _scan_inbox(inbox_dir, manifest)

    if not dry_run:
        _apply_additions(manifest, inbox_dir, scan.pending_additions)
        save_manifest_atomic(manifest_path, manifest)

    return IngestResult(
        inbox_dir=inbox_dir,
        out_dir=out_dir,
        manifest_path=manifest_path,
        dry_run=dry_run,
        files_found=scan.files_found,
        files_eligible=scan.files_eligible,
        files_new=scan.files_new,
        files_skipped_existing=scan.files_skipped_existing,
        files_skipped_duplicate_in_run=scan.files_skipped_duplicate_in_run,
    )


def _validate_inbox_dir(inbox_dir: Path) -> None:
    if not inbox_dir.exists():
        raise UserError(f"Inbox path does not exist: {inbox_dir}")
    if not inbox_dir.is_dir():
        raise UserError(f"Inbox path is not a directory: {inbox_dir}")


def _load_or_init_manifest(
    manifest_path: Path,
    inbox_dir: Path,
    out_dir: Path,
    dry_run: bool,
) -> Manifest:
    if out_dir.exists() and not out_dir.is_dir():
        raise UserError(f"Out path exists but is not a directory: {out_dir}")
    if not dry_run:
        try:
            out_dir.mkdir(parents=True, exist_ok=True)
        except OSError as exc:
            raise UserError(f"Failed to create out directory: {out_dir} ({exc})") from exc
    return load_manifest(manifest_path) if manifest_path.exists() else new_manifest(inbox_dir)


def _scan_inbox(inbox_dir: Path, manifest: Manifest) -> _ScanResult:
    files_found = 0
    files_eligible = 0
    files_new = 0
    skipped_existing = 0
    skipped_dupe_in_run = 0
    seen_hashes_this_run: set[str] = set()
    pending_additions: dict[str, dict[str, object]] = {}

    for path in _iter_inbox_files(inbox_dir):
        files_found += 1
        if path.suffix.lower() not in ALLOWED_EXTENSIONS:
            continue
        files_eligible += 1

        digest = _sha256_file(path)
        if digest in seen_hashes_this_run:
            skipped_dupe_in_run += 1
            continue
        seen_hashes_this_run.add(digest)
        if digest in manifest.files:
            skipped_existing += 1
            continue

        pending_additions[digest] = _build_manifest_entry(path, inbox_dir)
        files_new += 1

    return _ScanResult(
        files_found=files_found,
        files_eligible=files_eligible,
        files_new=files_new,
        files_skipped_existing=skipped_existing,
        files_skipped_duplicate_in_run=skipped_dupe_in_run,
        pending_additions=pending_additions,
    )


def _build_manifest_entry(path: Path, inbox_dir: Path) -> dict[str, object]:
    try:
        stat = path.stat()
    except OSError as exc:
        raise UserError(f"Failed to stat file: {path} ({exc})") from exc
    return {
        "source_relpath": _safe_relpath(path, inbox_dir),
        "size_bytes": stat.st_size,
        "mtime_epoch": int(stat.st_mtime),
    }


def _iter_inbox_files(inbox_dir: Path) -> list[Path]:
    paths: list[Path] = []
    for root, _dirs, files in os.walk(inbox_dir, onerror=_walk_onerror):
        for name in files:
            paths.append(Path(root) / name)
    return sorted(paths)


def _walk_onerror(exc: OSError) -> None:
    raise UserError(f"Failed to walk inbox directory: {exc.filename} ({exc})") from exc


def _sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    try:
        with path.open("rb") as f:
            while True:
                chunk = f.read(1024 * 1024)
                if not chunk:
                    break
                h.update(chunk)
    except OSError as exc:
        raise UserError(f"Failed to read file for hashing: {path} ({exc})") from exc
    return h.hexdigest()


def _safe_relpath(path: Path, root: Path) -> str:
    try:
        rel = path.relative_to(root)
    except ValueError:
        rel = Path(os.path.relpath(path, root))
    return rel.as_posix()


def _apply_additions(
    manifest: Manifest,
    inbox_root: Path,
    additions: dict[str, dict[str, object]],
) -> None:
    now = datetime.now(timezone.utc).isoformat()
    manifest.updated_at = now
    manifest.inbox_root = inbox_root.as_posix()
    for digest, meta in additions.items():
        manifest.files[digest] = meta | {"first_seen_at": now}
