import re
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, Literal, Optional, TypedDict, Union

from .. import constants
from ..file_download import repo_folder_name
from .sha import git_hash, sha_fileobj


if TYPE_CHECKING:
    from ..hf_api import RepoFile, RepoFolder

# using fullmatch for clarity and strictness
_REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")


# Typed structure describing a checksum mismatch
class Mismatch(TypedDict):
    path: str
    expected: str
    actual: str
    algorithm: str


HashAlgo = Literal["sha256", "git-sha1"]


@dataclass(frozen=True)
class FolderVerification:
    revision: str
    checked_count: int
    mismatches: list[Mismatch]
    missing_paths: list[str]
    extra_paths: list[str]
    verified_path: Path


def collect_local_files(root: Path) -> dict[str, Path]:
    """
    Return a mapping of repo-relative path -> absolute path for all files under `root`.
    """
    return {p.relative_to(root).as_posix(): p for p in root.rglob("*") if p.is_file()}


def _resolve_commit_hash_from_cache(storage_folder: Path, revision: Optional[str]) -> str:
    """
    Resolve a commit hash from a cache repo folder and an optional revision.
    """
    if revision and _REGEX_COMMIT_HASH.fullmatch(revision):
        return revision

    refs_dir = storage_folder / "refs"
    snapshots_dir = storage_folder / "snapshots"

    if revision:
        ref_path = refs_dir / revision
        if ref_path.is_file():
            return ref_path.read_text(encoding="utf-8").strip()
        raise ValueError(f"Revision '{revision}' could not be resolved in cache (expected file '{ref_path}').")

    # No revision provided: try common defaults
    main_ref = refs_dir / "main"
    if main_ref.is_file():
        return main_ref.read_text(encoding="utf-8").strip()

    if not snapshots_dir.is_dir():
        raise ValueError(f"Cache repo is missing snapshots directory: {snapshots_dir}. Provide --revision explicitly.")

    candidates = [p.name for p in snapshots_dir.iterdir() if p.is_dir() and _REGEX_COMMIT_HASH.fullmatch(p.name)]
    if len(candidates) == 1:
        return candidates[0]

    raise ValueError(
        "Ambiguous cached revision: multiple snapshots found and no refs to disambiguate. Please pass --revision."
    )


def compute_file_hash(path: Path, algorithm: HashAlgo) -> str:
    """
    Compute the checksum of a local file using the requested algorithm.
    """

    with path.open("rb") as stream:
        if algorithm == "sha256":
            return sha_fileobj(stream).hex()
        if algorithm == "git-sha1":
            return git_hash(stream.read())
        raise ValueError(f"Unsupported hash algorithm: {algorithm}")


def verify_maps(
    *,
    remote_by_path: dict[str, Union["RepoFile", "RepoFolder"]],
    local_by_path: dict[str, Path],
    revision: str,
    verified_path: Path,
) -> FolderVerification:
    """Compare remote entries and local files and return a verification result."""
    remote_paths = set(remote_by_path)
    local_paths = set(local_by_path)

    missing = sorted(remote_paths - local_paths)
    extra = sorted(local_paths - remote_paths)
    both = sorted(remote_paths & local_paths)

    mismatches: list[Mismatch] = []

    for rel_path in both:
        remote_entry = remote_by_path[rel_path]
        local_path = local_by_path[rel_path]

        lfs = getattr(remote_entry, "lfs", None)
        lfs_sha = getattr(lfs, "sha256", None) if lfs is not None else None
        if lfs_sha is None and isinstance(lfs, dict):
            lfs_sha = lfs.get("sha256")
        if lfs_sha:
            algorithm: HashAlgo = "sha256"
            expected = str(lfs_sha).lower()
        else:
            blob_id = remote_entry.blob_id  # type: ignore
            algorithm = "git-sha1"
            expected = str(blob_id).lower()

        actual = compute_file_hash(local_path, algorithm)

        if actual != expected:
            mismatches.append(Mismatch(path=rel_path, expected=expected, actual=actual, algorithm=algorithm))

    return FolderVerification(
        revision=revision,
        checked_count=len(both),
        mismatches=mismatches,
        missing_paths=missing,
        extra_paths=extra,
        verified_path=verified_path,
    )


def resolve_local_root(
    *,
    repo_id: str,
    repo_type: str,
    revision: Optional[str],
    cache_dir: Optional[Path],
    local_dir: Optional[Path],
) -> tuple[Path, str]:
    """
    Resolve the root directory to scan locally and the remote revision to verify.
    """
    if local_dir is not None:
        root = Path(local_dir).expanduser().resolve()
        if not root.is_dir():
            raise ValueError(f"Local directory does not exist or is not a directory: {root}")
        return root, (revision or constants.DEFAULT_REVISION)

    cache_root = Path(cache_dir or constants.HF_HUB_CACHE).expanduser().resolve()
    storage_folder = cache_root / repo_folder_name(repo_id=repo_id, repo_type=repo_type)
    if not storage_folder.exists():
        raise ValueError(
            f"Repo is not present in cache: {storage_folder}. Use 'hf download' first or pass --local-dir."
        )
    commit = _resolve_commit_hash_from_cache(storage_folder, revision)
    snapshot_dir = storage_folder / "snapshots" / commit
    if not snapshot_dir.is_dir():
        raise ValueError(f"Snapshot directory does not exist for revision '{commit}': {snapshot_dir}.")
    return snapshot_dir, commit
