import os
import tempfile
from pathlib import Path
from typing import List, Dict, Any

from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
from dotenv import load_dotenv
load_dotenv()

from ocr.preproc import preprocess
from ocr.engines import OCREngine
from ocr.pdf_helpers import process_pdf_pages
from utils.classify import classify_text
from extractors import extract_aadhaar, extract_voter, extract_land, ExtractResult

OCR_ENGINE = os.getenv("OCR_ENGINE", "tesseract")
PADDLE_LANG = os.getenv("PADDLE_LANG", "multi")
TESSERACT_CMD = os.getenv("TESSERACT_CMD") or None
TMP_DIR = Path(tempfile.gettempdir()) / "id_ocr_uploads"
TMP_DIR.mkdir(parents=True, exist_ok=True)

app = FastAPI(title="ID OCR Service", version="1.0")
engine = OCREngine(prefer=OCR_ENGINE, lang=PADDLE_LANG, tesseract_cmd=TESSERACT_CMD)

def _process_image_file(path: Path) -> Dict[str, Any]:
    pil_rgb, pil_bw, np_rgb, np_gray = preprocess(path, save_debug=False, debug_dir=Path("data/output/debug_images"), idx=0)
    ocr_res = engine.run(pil_rgb=pil_rgb, pil_bw=pil_bw, np_rgb=np_rgb, np_gray=np_gray)
    return {"text": ocr_res.text, "engine": ocr_res.engine, "source": str(path)}

def _classify_and_extract(text: str) -> Dict[str, Any]:
    doc_type = classify_text(text)
    if doc_type == "aadhaar":
        ext: ExtractResult = extract_aadhaar(text)
    elif doc_type == "voter":
        ext: ExtractResult = extract_voter(text)
    elif doc_type == "land":
        ext = extract_land(text)
    else:
        ext = ExtractResult("unknown", {"raw_text": text[:200]}, 0.0)
    return {"doc_type": ext.doc_type, "confidence": ext.confidence, "fields": ext.fields}

@app.post("/upload-multiple")
async def upload_multiple(files: List[UploadFile] = File(...)):
    results = {"voter_id": [], "aadhaar": [], "land": [], "agreement": [], "unknown": [], "errors": []}
    for upload in files:
        fname = upload.filename or "unnamed"
        tmp_path = TMP_DIR / fname
        try:
            with tmp_path.open("wb") as f:
                content = await upload.read()
                f.write(content)
        except Exception as e:
            results["errors"].append({"filename": fname, "error": f"failed to save upload: {e}"})
            continue
        try:
            if tmp_path.suffix.lower() == ".pdf":
                page_texts = process_pdf_pages(tmp_path, dpi=300, max_workers=2, prefer=OCR_ENGINE, lang=PADDLE_LANG, save_debug=False)
                for page_idx, page_text in enumerate(page_texts, start=1):
                    out = _classify_and_extract(page_text)
                    out["source_file"] = fname
                    out["page"] = page_idx
                    key = out["doc_type"]
                    if key == "voter":
                        results["voter_id"].append(out)
                    elif key == "aadhaar":
                        results["aadhaar"].append(out)
                    elif key == "land":
                        results["land"].append(out)
                    else:
                        results["unknown"].append(out)
            else:
                ocr_info = _process_image_file(tmp_path)
                out = _classify_and_extract(ocr_info["text"])
                out["source_file"] = fname
                out["ocr_engine"] = ocr_info.get("engine")
                out["page"] = 1
                key = out["doc_type"]
                if key == "voter":
                    results["voter_id"].append(out)
                elif key == "aadhaar":
                    results["aadhaar"].append(out)
                elif key == "land":
                    results["land"].append(out)
                else:
                    results["unknown"].append(out)
        except Exception as e:
            results["errors"].append({"filename": fname, "error": str(e)})
        finally:
            try:
                tmp_path.unlink()
            except Exception:
                pass
    return JSONResponse(content=results)

@app.get("/health")
def health():
    return {"status": "ok", "ocr_engine": OCR_ENGINE, "paddle_lang": PADDLE_LANG}

if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
