from __future__ import annotations

import hashlib
import json
import os
from dataclasses import asdict, dataclass
from pathlib import Path

from src.errors import UserError
from src.images.paths import gbp_dest_paths, website_dest_path
from src.ingest.ingest import ingest_inbox
from src.matching.io import iter_image_files, validate_inbox_dir
from src.reporting.matching import MatchInfo, match_inbox_photos
from src.reporting.run_id import new_run_id


@dataclass(frozen=True)
class AuditRow:
    photo_sha256: str
    source_path: str
    source_relpath: str
    deal_id: int | None
    deal_title: str | None
    venue_name: str | None
    place: str | None
    region: str | None
    confidence: float | None
    match_reason: str
    website_webp_path: str
    website_webp_exists: bool
    website_webp_bytes: int | None
    gbp_jpg_path: str
    gbp_jpg_exists: bool
    gbp_jpg_bytes: int | None
    gbp_png_path: str
    gbp_png_exists: bool
    gbp_png_bytes: int | None

    def to_dict(self) -> dict[str, object]:
        return asdict(self)


@dataclass(frozen=True)
class RunSummary:
    run_id: str
    inbox_dir: Path
    out_dir: Path
    dry_run: bool
    photos_found: int
    matched: int
    website_present: int
    gbp_present: int
    errors: int
    audit_dir: Path

    def to_human_summary(self) -> str:
        mode = "DRY-RUN" if self.dry_run else "RUN"
        return (
            f"{mode} run summary\n"
            f"- run_id: {self.run_id}\n"
            f"- inbox: {self.inbox_dir}\n"
            f"- out: {self.out_dir}\n"
            f"- photos_found: {self.photos_found}\n"
            f"- matched: {self.matched}\n"
            f"- website_present: {self.website_present}\n"
            f"- gbp_present: {self.gbp_present}\n"
            f"- errors: {self.errors}\n"
            f"- audit_dir: {self.audit_dir}"
        )

    def to_dict(self) -> dict[str, object]:
        payload = asdict(self)
        payload["inbox_dir"] = str(self.inbox_dir)
        payload["out_dir"] = str(self.out_dir)
        payload["audit_dir"] = str(self.audit_dir)
        return payload


def run_audit(
    *,
    inbox_dir: Path,
    out_dir: Path,
    run_id: str | None,
    enable_match: bool,
    dry_run: bool,
) -> RunSummary:
    validate_inbox_dir(inbox_dir)
    run_id_final = _normalize_run_id(run_id) or new_run_id()

    ingest_inbox(inbox_dir=inbox_dir, out_dir=out_dir, dry_run=dry_run)
    rows, errors = _build_rows(inbox_dir=inbox_dir, out_dir=out_dir, enable_match=enable_match)
    summary = _build_summary(run_id_final, inbox_dir, out_dir, dry_run, rows, errors)

    if not dry_run:
        _write_audit_artifacts(summary.audit_dir, summary=summary, rows=rows)

    return summary


def _build_rows(
    *, inbox_dir: Path, out_dir: Path, enable_match: bool
) -> tuple[list[AuditRow], int]:
    match_data = _maybe_match(inbox_dir=inbox_dir, enable_match=enable_match)

    errors = 0
    rows: list[AuditRow] = []
    for src in iter_image_files(inbox_dir):
        row, had_error = _build_row(
            src=src, inbox_dir=inbox_dir, out_dir=out_dir, match=match_data.get(str(src))
        )
        rows.append(row)
        errors += int(had_error)

    return (rows, errors)


def _build_row(
    *, src: Path, inbox_dir: Path, out_dir: Path, match: MatchInfo | None
) -> tuple[AuditRow, bool]:
    digest = sha256_file(src)
    rel = _safe_relpath_posix(src, inbox_dir)
    m = _match_fields(match)

    website = website_dest_path(out_dir=out_dir, inbox_dir=inbox_dir, src=src)
    gbp_jpg, gbp_png = gbp_dest_paths(out_dir=out_dir, inbox_dir=inbox_dir, src=src)
    w_exists, w_bytes = _file_stats(website)
    j_exists, j_bytes = _file_stats(gbp_jpg)
    p_exists, p_bytes = _file_stats(gbp_png)

    row = AuditRow(
        photo_sha256=digest,
        source_path=str(src),
        source_relpath=rel,
        deal_id=m.deal_id,
        deal_title=m.deal_title,
        venue_name=m.venue_name,
        place=m.place,
        region=m.region,
        confidence=m.confidence,
        match_reason=m.reason,
        website_webp_path=str(website),
        website_webp_exists=w_exists,
        website_webp_bytes=w_bytes,
        gbp_jpg_path=str(gbp_jpg),
        gbp_jpg_exists=j_exists,
        gbp_jpg_bytes=j_bytes,
        gbp_png_path=str(gbp_png),
        gbp_png_exists=p_exists,
        gbp_png_bytes=p_bytes,
    )
    had_error = bool(match and match.match_reason.startswith("error:"))
    return (row, had_error)


@dataclass(frozen=True)
class _MatchFields:
    deal_id: int | None
    deal_title: str | None
    venue_name: str | None
    place: str | None
    region: str | None
    confidence: float | None
    reason: str


def _match_fields(match: MatchInfo | None) -> _MatchFields:
    if match is None:
        return _MatchFields(
            deal_id=None,
            deal_title=None,
            venue_name=None,
            place=None,
            region=None,
            confidence=None,
            reason="match_disabled",
        )
    return _MatchFields(
        deal_id=match.deal_id,
        deal_title=match.deal_title,
        venue_name=match.venue_name,
        place=match.place,
        region=match.region,
        confidence=match.confidence,
        reason=match.match_reason,
    )


def _maybe_match(*, inbox_dir: Path, enable_match: bool) -> dict[str, MatchInfo]:
    return match_inbox_photos(inbox_dir) if enable_match else {}


def _build_summary(
    run_id: str,
    inbox_dir: Path,
    out_dir: Path,
    dry_run: bool,
    rows: list[AuditRow],
    errors: int,
) -> RunSummary:
    matched = sum(1 for r in rows if r.deal_id is not None)
    website_present = sum(1 for r in rows if r.website_webp_exists)
    gbp_present = sum(1 for r in rows if r.gbp_jpg_exists and r.gbp_png_exists)
    audit_dir = out_dir / "audit" / run_id
    return RunSummary(
        run_id=run_id,
        inbox_dir=inbox_dir,
        out_dir=out_dir,
        dry_run=dry_run,
        photos_found=len(rows),
        matched=matched,
        website_present=website_present,
        gbp_present=gbp_present,
        errors=errors,
        audit_dir=audit_dir,
    )


def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    try:
        with path.open("rb") as f:
            while True:
                chunk = f.read(1024 * 1024)
                if not chunk:
                    break
                h.update(chunk)
    except OSError as exc:
        raise UserError(f"Failed to read file for hashing: {path} ({exc})") from exc
    return h.hexdigest()


def _safe_relpath_posix(path: Path, root: Path) -> str:
    try:
        rel = path.relative_to(root)
    except ValueError:
        rel = Path(os.path.relpath(path, root))
    return rel.as_posix()


def _normalize_run_id(raw: str | None) -> str | None:
    value = (raw or "").strip()
    return value or None


def _file_stats(path: Path) -> tuple[bool, int | None]:
    if not path.exists():
        return (False, None)
    try:
        return (True, int(path.stat().st_size))
    except OSError:
        return (True, None)


def _write_audit_artifacts(audit_dir: Path, *, summary: RunSummary, rows: list[AuditRow]) -> None:
    audit_dir.mkdir(parents=True, exist_ok=True)
    _write_json_atomic(audit_dir / "summary.json", summary.to_dict())
    _write_jsonl_atomic(audit_dir / "audit.jsonl", [r.to_dict() for r in rows])


def _write_json_atomic(path: Path, payload: dict[str, object]) -> None:
    text = json.dumps(payload, indent=2, sort_keys=True, ensure_ascii=True) + "\n"
    _write_text_atomic(path, text)


def _write_jsonl_atomic(path: Path, rows: list[dict[str, object]]) -> None:
    text = "".join([json.dumps(r, sort_keys=True, ensure_ascii=True) + "\n" for r in rows])
    _write_text_atomic(path, text)


def _write_text_atomic(path: Path, text: str) -> None:
    tmp = path.with_suffix(path.suffix + ".tmp")
    try:
        tmp.write_text(text, encoding="utf-8", newline="\n")
        tmp.replace(path)
    except OSError as exc:
        raise UserError(f"Failed to write file: {path} ({exc})") from exc