o
    *i+?                     @   s$  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlZdZdZeddZG d	d
 d
e	ZeddZdedefddZdedee fddZdedee fddZdedee fddZdededefddZeddefddZed d!efd"d#ZdS )$    N)FastAPIHTTPException)	BaseModel)convert_from_pathzben+engi,  zLand PDF Search API)titlec                   @   s2   e Zd ZU ee ed< ee ed< ee ed< dS )SearchRequestpdf_urlsJL_NoDaag_NoN)__name__
__module____qualname__liststr__annotations__ r   r   "/var/www/html/land-ocr/main_dem.pyr     s   
 r   u   ০১২৩৪৫৬৭৮৯
0123456789textreturnc                 C   s   |  t} tdd| } | S )N\s+ )	translateBN_TO_ENresubr   r   r   r   normalize_text  s   
r   c                 C   s   t td| S )N\b\d{2,4}\b)setr   findallr   r   r   r   extract_jl_numbers  s   r!   pdf_pathc                 C   s|   t  }t| ,}|jD ]}| pd}|t}tdd|}t	d|}|
| qW d   |S 1 s7w   Y  |S )u`   
    Extract Daag numbers from PDF text-layer like:
    '261 সৈ য়ম 2.17 Click Here'
     r   r   z)\b(\d{2,4})\s+[^0-9]+?\s+\d+\.\d+\s+ClickN)r   
pdfplumberopenpagesextract_textr   r   r   r   r    update)r"   resultspdfpager   matchesr   r   r   extract_daag_from_pdf_text  s    


r-   c                 C   sJ   t  }| t} |  D ]}d|v r"td|}|r"||d q|S )uR   
    OCR fallback for Daag rows like:
    '261 সৈ য়ম 2.17 Click Here'
    Clickr   r   )r   r   r   
splitlinesr   searchaddgroup)r   r)   linematchr   r   r   extract_daag_numbers  s   
r5   urlfolderc                 C   sh   t j|t j| }tj| dd}|  t|d}||j	 W d    |S 1 s-w   Y  |S )N   )timeoutwb)
ospathjoinbasenamerequestsgetraise_for_statusr%   writecontent)r6   r7   r<   rfr   r   r   download_pdf  s   
rF   z/extractpayloadc              
   C   st  t | j}t | j}t  }t  }t s}| jD ]g}z\t||}t|}|||@  t	|| }	t	|| }
|	s;|
rjt
|tddd}|D ]$}tj|tdd}t|}|	r^||t|@  |
ri||t|@  qE|| su|| suW  nW q ty   Y qw W d    n1 sw   Y  t|t|dt|| t|| dd}|d d s|d d	 s|S td
|d)N   )dpi
first_page	last_pagez--psm 6)langconfigr	   r
   found	not_foundrQ   r	   r
   i  )status_codedetail)r   r	   r
   tempfileTemporaryDirectoryr   rF   r-   r(   boolr   DPIpytesseractimage_to_stringOCR_LANGr   r!   r5   	Exceptionsortedr   )rG   req_jlreq_daagfound_jl
found_daagtmpdirr6   r"   daag_from_textneed_jl_ocrneed_daag_ocrimagesimgr   responser   r   r   extract_land_data  sb   




-

rh   z/quick_checkrequestc                    s,   t dddgddgddgdgdd	d
S )N   63124261371rN   999888rO   )rR   rC   )JSONResponse)ri   r   r   r   quick_checkR  s   rr   )r;   r   rT   r?   r$   fastapir   r   pydanticr   	pdf2imager   rX   rZ   rW   appr   r   	maketransr   r   r   r!   r-   r5   rF   postrh   Requestrr   r   r   r   r   <module>   s4      (
E