
    9i'                        S SK r S SKJr  S SKrS SKJrJr  S SKr	S SK
rS SKJr  S SKJr  S SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJrJr  S S
KJr  \R<                  S:  a'  \R>                  R@                  r\RB                  " 5         \" 5       r"S/r#\RH                  " \RJ                  \RL                  S9 " S S\5      5       r'g)    N)AnyDict)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)ConfigConfigFields)	ModelFileTasks)
get_loggerz2.0LanguageIdentificationPipeline)module_namec                      ^  \ rS rSrSrS\4U 4S jjrS\S\4S jrS\S\	\\
4   4S jrS\	\\
4   S\	\\
4   4S	 jrS
\	\\
4   S\	\\
4   4S jrSrU =r$ )r      u
  Language Identification Pipeline.

Examples:

>>> from modelscope.pipelines import pipeline
>>> from modelscope.utils.constant import Tasks

>>> pipeline_ins = pipeline(Tasks.text_classification, 'damo/nlp_language_identification-classification-base')
>>> pipeline_ins('Elon Musk, co-founder and chief executive officer of Tesla Motors.\n' \
>>>              'Gleichzeitig nahm die Legion an der Befriedung Algeriens teil, die von.\n' \
>>>              '使用pipeline推理及在线体验功能的时候，尽量输入单句文本，如果是多句长文本建议人工分句。'

>>> {
>>>    "labels":[
>>>        "en",
>>>        "de",
>>>        "zh"
>>>    ],
>>>    "scores":[
>>>        [('en', 0.99)],
>>>        [('de', 1.0)],
>>>        [('zh', 1.0)]
>>>    ]
>>> }
modelc           
      0  > [         TU ]  " SSU0UD6  UnSU l        [        R                  " [
        R                  R                  U[        R                  5      5      U l
        [
        R                  R                  X0R                  [        R                     S   5      n/ n/ n[        [        US5      5       HK  u  pxUR                  5       n UR!                  S5      nUR#                  X45        UR#                  Xx45        MM     [)        U5      U l        [)        U5      U l        U R*                  R/                  SS5      U l        U R*                  R/                  S	S
5      U l        [
        R                  R                  X0R                  [        R                     S   5      n	[)        [        [        U	SSS95       VVs/ s H  u  pxXxR                  5       4PM     snn5      U l        SU l        [8        R:                  " 5         [8        R<                  " SS9n
SU
R>                  l         [8        RB                  " U
S9U l"        [8        RF                  RH                  RK                  U RD                  [8        RF                  RL                  RN                  /U5        [8        RP                  " 5       nU R                  (       a;  URS                  5        H'  n['        URT                  URW                  5       5        M)     URY                  S5      U l-        URY                  S5      nURY                  S5      nUUS.U l.        [8        R^                  " 5       n[8        R`                  " 5       nU RD                  Rc                  UU/5        [8        RF                  RH                  RK                  U RD                  [8        RF                  RL                  RN                  /U5        g! [$         a"    U R                  (       a  ['        SX5         GMT  f = fs  snnf )z~Build a language identification pipeline with a model dir or a model id in the model hub.

Args:
    model: A Model instance.
r   Fvocabrbzutf-8zerror vocab:<UNK>   z</S>r   labelrutf8)encodingunkT)allow_soft_placement)configz	src_cid:0zoutput_label:0zpredict_score:0)
output_idsoutput_scoreN )2super__init__debugr
   	from_fileospathjoinr   CONFIGURATIONcfgr   preprocessor	enumerateopenstripdecodeappendUnicodeDecodeErrorprintdictr   vocab_reversegetunk_idpad_idr   	unk_labeltfreset_default_graphConfigProtogpu_optionsallow_growthSession_sessionsaved_modelloaderloadtag_constantsSERVINGget_default_graphget_operationsnamevaluesget_tensor_by_name	input_idsoutputglobal_variables_initializerlocal_variables_initializerrun)selfr   kwargs
export_dirjoint_vocab_file
vocabfilesvocabfiles_reverseiwjoint_label_file	tf_configdefault_graphopoutput_labelr!   init
local_init	__class__s                    x/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/nlp/language_identification_pipline.pyr$   'LanguageIdentificationPipeline.__init__:   s&    	/u//

##GGLLY%<%<=? 77<<!:!:;GDF
d#3T:;DA	AHHW%!!1&)"))1&1 < *%
!"45jjnnWa0jjnnVQ/77<<!:!:;GDFi!387: ; 7:daAwwy> 7: ; <

 NN=	-1	*

)4
""4==$&NN$@$@$H$H#I#-	/ ,,.::#224bggryy{+ 5 '99+F$778HI$778IJ '(
 ..0335
4,-
""4==$&NN$@$@$H$H#I#-	/Q & ::.!/	;s   5O#P
#'PPinputreturnc                 P   UR                  5       nSn[        R                  " USU5      nSn[        R                  " USU5      nSn[        R                  " USU5      nS nSnSR                  U Vs/ s H  nX;  a  U" U5      OSPM     sn5      n[        R                  " S[        R
                  5      n	[        R                  " U	SU5      nSR                  UR                  5        V
s/ s HQ  n
[        [        R                  " S	U
5      5      (       a'  [        [        R                  " S
U
5      5      (       a  MO  U
PMS     sn
5      n/ nUR                  5        Hl  nU R                  R                  XR                  5      n[        U5      S:  a$  XR                  :X  a  US   U R                  :X  a  M[  UR                  U5        Mn     [        U5      S:  a  US   U R                  :X  a  USS  n[        U5      S:  a  US   U R                  :X  a  US S nU$ s  snf s  sn
f )Nz/<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6}); z\S+[./]\S+\s?z
\S*@\S*\s?c                     [        U 5      nSU:  d  US:  a  US-  nOUS:X  a  SnOUS;   a  SnOUS;   a  S	n[        U5      $ )
Ni   i_  i  i 0      )i0  i0  i   i   i   i   "   )i   i   i   i   '   )ordchr)ucharinside_codes     r`   stringpartQ2BELanguageIdentificationPipeline._lid_preprocess.<locals>.stringpartQ2B   s^    e*K#{V';v%&$ !  % @@${##    uV   ,-+"'\&.!=:;°·$«»|±[]{}_?<>~^*/%#@()，。！《》？、`Â …‼️ u`   [😀-🙏🌀-🗿🚀-🛿🇠-🇿🤦-🤷𐀀-􏿿✂-➰♀-♂☀-⭕‍⏏⏩⌚️〰]+z\dz^[a-z0-9+-_]+$r   r   )lowerresubr)   compileUNICODEsplitboolsearchmatchr/   r   r6   r7   lenr1   )rP   rb   sentenceCLEANRURLREEMAILRErn   m_noisyCharscEMOJIREitemoutidsrW   tmps                 r`   _lid_preprocess.LanguageIdentificationPipeline._lid_preprocess}   s   ;;=C66&"h/ 66%X.66'2x0	$ w77
 !" 5M!3>
  **
 JJ 66'2x088%^^-
-T5$/00BHH%6=> -
  !A**..KK0C6  KK/F2J$++4MMM# " v;?vayDKK7ABZFv;?vbzT[[8CR[FM
&
s   3H-AH#?H#c                    UR                  S5      nU Vs/ s H*  nUR                  5       S:w  d  M  U R                  U5      PM,     nnU R                  (       a  [	        X$5       Hp  u  p5[        SU5        [        SSR                  U Vs/ s H8  nU R                  R                  X`R                  5      R                  SS5      PM:     sn5      5        Mr     [        U Vs/ s H  n[        U5      PM     sn5      nU H.  nUR                  U R                  /U[        U5      -
  -  5        M0     [        R                   " U5      nSU0n	U	$ s  snf s  snf s  snf )N
re   zraw:zres:r   rq   rK   )rx   r/   r   r%   zipr3   r)   r5   r6   r7   replacemaxr|   extendr8   nparray)
rP   rb   
sentenceltr}   input_ids_ltrK   wididsmaxlenresults
             r`   
preprocess)LanguageIdentificationPipeline.preprocess   s<   [[&
;E
;Ex~~2% +D  *: 	 
 ::'*:'D#fh'BGG5>%5>c **..sKK@HH#S*5>%  (E ,7,3c#h,78CJJ}S(9:;  HH\*	y))
% 8s   EE?E"Ec                     U R                   R                  5          U R                  US   0nU R                   R                  U R                  US9nUsS S S 5        $ ! , (       d  f       g = f)NrK   )	feed_dict)r@   
as_defaultrK   rO   rL   )rP   rb   r   sess_outputss       r`   forward&LanguageIdentificationPipeline.forward   sQ    ]]%%'{);<I==,,T[[I,NL (''s   7A
A*inputsc                 @   US   n[        / SQ5      n/ n/ nU H  n/ n[        X`R                  R                  5       5       H  u  pX;  a  M  UR	                  X45        M      [        US SS9S S n[        U5      S:X  a  S/nUR	                  U5        UR	                  US   S   5        M     U V
VVs/ s H.  n
U
 VVs/ s H  u  pUS	:  d  M  U[        US
5      4PM     snnPM0     nnn
n[        R                  U[        R                  U0nU$ s  snnf s  snnn
f )Nr!   )hafamarazbebgbnbscacecocscydadeeleneoeseteufafifrfygagdglguhahawhehihmnhrhthuhyidigisitjajvkakkkmknkokukylaloltlvmgmimkmlmnmrmsmtmynenlnonypaplpsptrorusdsiskslsmsnsosqsrstsusvswtatetgthtltrugukuruzvixhyiyozhzzh-twzuc                     U S   $ )Nr   r"   )rV   s    r`   <lambda><LanguageIdentificationPipeline.postprocess.<locals>.<lambda>   s    !rp   T)keyreverse   r   )r   g      ?g{Gz?   )setr   r   rI   r1   sortedr|   roundr   LABELSSCORES)rP   r   output_scores_rawsupported_104_langlabels_scores_ltoutput_labelsr!   tmpltsllabels_scoresr   scoreoutput_scoresr   s                  r`   postprocess*LanguageIdentificationPipeline.postprocess   s@   ">2  "
  -LEL***;*;*=>.aV$ ? 5ndCBQGE5zQ"##E*  q!- . /?@.>] /<M.;leut| 35%q/2.;M.> 	 @
 }}
 M @s   6DDD#DD)r@   r+   r%   rK   r   rL   r8   r7   r9   r   r5   )__name__
__module____qualname____firstlineno____doc__strr$   listr   r   r   r   r   r  __static_attributes____classcell__)r_   s   @r`   r   r      s    4A/c A/FBS BT BH S#X 0 T#s(^  S#X  $$sCx. $T#s(^ $ $rp   )(r'   os.pathr(   osprt   typingr   r   numpyr   
tensorflowr:   modelscope.metainfor   modelscope.models.baser   modelscope.outputsr   modelscope.pipelines.baser   modelscope.pipelines.builderr	   modelscope.utils.configr
   r   modelscope.utils.constantr   r   modelscope.utils.loggerr   __version__compatv1disable_eager_executionlogger__all__register_moduletext_classificationlanguage_identificationr   r"   rp   r`   <module>r?     s    
  	    ) ( ) . 2 8 6 .>>U	B 	+
, 	9+L+LNdX dNdrp   