
    FSir                     H   S SK JrJrJr  S SKJr  S SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKJr  S SKJr  S SKr\R"                  " \R$                  S9  \R&                  " \5      rSrSr\" S	S
9r " S S\5      r\R7                  SS5      rS\S\4S jrS\S-  S\4S jrS\S\\   4S jr \RB                  " S\RD                  5      r#S\S\\   4S jr$S\S\\   4S jr%S\S\S\4S jr&\RO                  S5      S\4S j5       r(\RO                  S5      S \4S! j5       r)g)"    )FastAPIHTTPExceptionRequest)	BaseModelN)convert_from_pathJSONResponse)levelzben+engi  zLand PDF Search API)titlec                   F    \ rS rSr% \\   \S'   \\   \S'   \\   \S'   Srg)SearchRequesti  pdf_urlsJL_NoDaag_No N)__name__
__module____qualname____firstlineno__liststr__annotations____static_attributes__r       /var/www/html/land-ocr/main.pyr   r     s     3i9#Yr   r   u   ০১২৩৪৫৬৭৮৯
0123456789textreturnc                 |    U R                  [        5      n [        R                  " SSU 5      R	                  5       n U $ )Nz\s+ )	translateBN_TO_ENresubstripr   s    r   normalize_textr'     s0    >>(#D66&#t$**,DKr   cellc                 (    U (       d  g[        U 5      $ )N )r'   )r(   s    r   
clean_cellr+     s    $r   c                     [        [        R                  " SU [        R                  5      5      nU(       a  U$ [        [        R                  " SU 5      5      $ )NuK   (?:জে\.?\s*এল\.?\s*নং|J\.?\s*L\.?\s*No\.?)\s*[:\-]?\s*(\d{2,4})z\b\d{2,4}\b)setr#   findall
IGNORECASE)r   labeleds     r   extract_jl_numbersr1      sD    "**V
 G
 rzz.$/00r   z\b(\d{2,4})\s+[\u0980-\u09FF]pdf_pathc                 
   [        5       n[        R                  " U 5       nUR                   H  nSSSSS.nUR	                  U5       HR  nU HI  nU(       d  M  [        US   5      n[        R                  " SU5      (       d  M8  UR                  U5        MK     MT     [        UR                  5       =(       d    S5      nUR                  [        R                  U5      5        M     S S S 5        U$ ! , (       d  f       U$ = f)Nlines   )vertical_strategyhorizontal_strategysnap_tolerancejoin_tolerancer   z\d{2,4}r*   )r-   
pdfplumberopenpagesextract_tablesr+   r#   	fullmatchaddr'   extract_textupdateDAAG_LINE_PATTERNr.   )	r2   resultspdfpagetable_settingstablerow	daag_cellr   s	            r   extract_daag_from_pdf_textrJ   6  s    eG		"cIID &-'."#"#	N ,,^< C  *3q6 2I||J	::I. ! = "$"3"3"5";<DNN,44T:;%  
#* N+ 
#	"* Ns   A!C3A#C33
Dc                     [        U 5      n [        R                  " SSU 5      n [        [        R                  U 5      5      $ )Nu   দা\s*গu	   দাগ)r'   r#   r$   r-   rB   r.   r&   s    r   extract_daag_numbers_ocrrL   P  s5    $D66/;5D ((.//r   urlfolderc                 N   [         R                  R                  U[         R                  R                  U 5      5      n[        R
                  " U SS9nUR                  5         [        US5       nUR                  UR                  5        S S S 5        U$ ! , (       d  f       U$ = f)N   )timeoutwb)
ospathjoinbasenamerequestsgetraise_for_statusr;   writecontent)rM   rN   rT   rfs        r   download_pdfr^   V  su    77<< 0 0 56DS"%A	dD	Q			 
K 
	Ks   /B
B$z/extractpayloadc           	         [        U R                  5      n[        U R                  5      n[        5       n[        5       n/ n[        R                  " 5        nU R
                   H  n [        Xv5      nSn [        U[        S9nU H.  n[        R                  " U[        SS9nUS[!        U5      -   -  nM0     UR#                  U[%        U5      -  5         ['        U5      nUR#                  X/-  5        X$-
  (       d  M  U(       d  M  UR#                  U[)        U5      -  5        M     S S S 5        [+        U5      [+        U5      S
.[+        X-
  5      [+        X$-
  5      S
.US.nUS   S   (       d  US   S   (       d  U$ [-        SUS9e! [         a:  n	SU SU	 3n
[        R                  U
5        UR                  U
5         S n	A	GMf  S n	A	ff = f! [         a9  n	SU SU	 3n
[        R                  U
5        UR                  U
5         S n	A	GN;S n	A	ff = f! [         a9  n	S	U SU	 3n
[        R                  U
5        UR                  U
5         S n	A	GNbS n	A	ff = f! , (       d  f       GN<= f)NzFailed to download z: r*   )dpiz--psm 6)langconfigr    zOCR failed for z!pdfplumber extraction failed for r   r   )found	not_founderrorsrf   r   r   i  )status_codedetail)r-   r   r   tempfileTemporaryDirectoryr   r^   	Exceptionloggererrorappendr   DPIpytesseractimage_to_stringOCR_LANGr'   rA   r1   rJ   rL   sortedr   )r_   req_jlreq_daagfound_jl
found_daagrg   tmpdirrM   r2   emsgocr_textimagesimgpage_ocrdaag_from_pdfresponses                    r   extract_land_datar   _  sH   F7??#HuHJF		$	$	&&##C'4 H#*8=!C*::(9 H nX&> >>H	 " );H)E EF# :8 D!!(":; %%88!!(-Eh-O"OPM $ 
'V H%j)

 F-.h34
 
H K )(;2G	2R
C
99e  +C51#6S!c"	"  #'uBqc2S!c""#  #9#bDS!c""#A 
'	&s   I'F
2I5AGH3I>I!I

G.G	I	GI
H.H	IHI
I!.IIII
I,z/quick_checkrequestc                 8   #    [        S/ SQ/ SQS./ / S.S.S9$ 7f)N   )63106125)182261946rd   )re   rf   )rh   r[   r   )r   s    r   quick_checkr     s4      .0
 	
 s   )*fastapir   r   r   pydanticr   rq   r#   rW   rj   r:   rS   	pdf2imager   fastapi.responsesr	   loggingbasicConfigINFO	getLoggerr   rm   rs   rp   appr   r   	maketransr"   r'   r+   r-   r1   compileUNICODErB   rJ   rL   r^   postr   r   r   r   r   <module>r      so  f 4 3   	    	 ' *    ',, '			8	$	)*I  ==9<H   S4Z  C  	1S 	1SX 	1  JJ$JJ  S 403 03s8 0c 3 3  *?:} ?: ?:D .w  r   