
    9i                     R   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKJrJ	r	J
r
JrJrJrJr  SSKrSSKJs  Jr  SSKrSSKJr  SSKJrJr  SSKJr  SSKJr  SSKJrJrJrJ r   SS	K!J"r"J#r#J$r$  SS
K%J&r&  SSK'J(r(J)r)J*r*  SSK+J,r,  SSK-J.r.J/r/J0r0  SSK1J2r2  SSK3J4r5  SSK6J7r7  SSK8J9r9  SSK:J;r;  \Rx                  S:w  al  \Rz                  R}                  S5        \Rz                  R                  S5        \Rz                  R                  S5        \Rz                  R                  S5        \5R                  " 5       r4SrCSrDS/rE " S S\5      rFS rG " S S\R"                  R                  5      rI\R                  R                  S 5       rLS  rM " S! S"\R"                  R                  5      rNS# rO\R                  R                  S$ 5       rP   S8S% jrQ " S& S'\R"                  R                  5      rR " S( S)\R"                  R                  5      rS " S* S+\R"                  R                  5      rT " S, S-\R"                  R                  5      rU " S. S/\0\&5      rVS0rWS1rX\)" S2\W5       " S3 S4\V5      5       rY\.R                  " \7R                  \,R                  S59 " S6 S7\V5      5       r]g)9zPyTorch ChatGLM model.     N)AnyCallableDictListOptionalTupleUnion)nn)CrossEntropyLoss	LayerNorm)	skip_init)LogitsProcessor)GenerationConfigLogitsProcessorListModelOutputStoppingCriteriaList)BaseModelOutputWithPast)BaseModelOutputWithPastAndCrossAttentionsCausalLMOutputWithPast)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)Models)MODELSModel
TorchModel)
OutputKeys)logger)Tasks   )ChatGLMConfig)ChatGLMTokenizerdarwinFTzTHUDM/ChatGLM-6BChatGLM6BConfigzTHUDM/chatglm-6bc                   f    \ rS rSrS\R
                  S\R                  S\R                  4S jrSrg)InvalidScoreLogitsProcessor8   	input_idsscoresreturnc                     [         R                  " U5      R                  5       (       d)  [         R                  " U5      R                  5       (       a  UR	                  5         SUS'   U$ )Ng     j@).   )torchisnananyisinfzero_)selfr)   r*   s      m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/chatglm/text_generation.py__call__$InvalidScoreLogitsProcessor.__call__:   sH    ;;v""$$F(;(?(?(A(ALLN F6N     N)	__name__
__module____qualname____firstlineno__r.   
LongTensorFloatTensorr5   __static_attributes__r8   r7   r4   r'   r'   8   s0    %"2"2 **/4/@/@r7   r'   c           	          SSK nSSKnSSKn[        R                  R                  U5      n[        R                  SU 35        UR                  R                  U5      n/ n/ n	U H]  u  p[        R                  SU
 SU 35        UR                  R                  Xj5      nUR                  U
5        U	R                  U5        M_     [        X5       GH  u  pU
R                  S5      n
[!        S U
 5       5      (       a)  [        R                  S	SR#                  U
5       35        MW  U nU
 H  nUR$                  " S
U5      (       a  UR                  " SU5      nOU/nUS   S:X  d	  US   S:X  a  ['        US5      nOZUS   S:X  d	  US   S:X  a  ['        US5      nO;US   S:X  a  ['        US5      nO%US   S:X  a  ['        US5      nO ['        XS   5      n[+        U5      S:  d  M  [-        US   5      nUU   nM     WSS S:X  a  ['        US5      nOUS:X  a  UR/                  U5      n UR0                  UR0                  :X  d"   SUR0                   SUR0                   S35       e [        R                  SU
 35        [6        R8                  " U5      Ul        GM     U $ ! [         a    [        R                  S5        e f = f! [(         a,    [        R                  S	SR#                  U
5       35         GM  f = f! [2         a1  nU=R4                  UR0                  UR0                  4-  sl        e SnAff = f)z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape /c              3   ,   #    U  H
  nUS ;   v   M     g7f))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepNr8   ).0ns     r4   	<genexpr>0load_tf_weights_in_chatglm_6b.<locals>.<genexpr>_   s%       a  
 
 s   z	Skipping z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabiasoutput_weightssquad
classifier   r!   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpy
tensorflowImportErrorr   errorospathabspathinfotrainlist_variablesload_variableappendzipsplitr0   join	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargsr.   
from_numpydata)modelconfigtf_checkpoint_pathrW   nptftf_path	init_varsnamesarraysnamerm   arraypointerm_namescope_namesnumes                     r4   load_tf_weights_in_chatglm_6br   B   s   
 ggoo01G
KK8	BC''0IEF (l5'BC&&w5Te	 ! 5)zz#      KK)CHHTN#345F||,f55 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62Q#33!'84Q7*!'<8%g1~>G ;1$+a.)!#,+ , #$<=(gx0GxLL'E	,Y.?}KXY,
 	078''.[ *\ LA  Q	
 	Z & KK)CHHTN+; <=  	FFw}}ekk22F	s5   J% 
K	.<L%!K	1K?>K?
L=,L88L=c                   N   ^  \ rS rSrSrU 4S jrS\R                  4S jrSr	U =r
$ )PrefixEncoder   z
The torch.nn model to encode the prefix
Input shape: (batch-size, prefix-length)
Output shape: (batch-size, prefix-length, 2*layers*hidden)
c           	        > [         TU ]  5         UR                  U l        U R                  (       a  [        R                  R                  UR                  UR                  5      U l        [        R                  R                  [        R                  R                  UR                  UR                  5      [        R                  R                  5       [        R                  R                  UR                  UR                  UR                  -  S-  5      5      U l        g [        R                  R                  UR                  UR                  UR                  -  S-  5      U l        g )NrU   )super__init__prefix_projectionr.   r
   	Embeddingpre_seq_lenhidden_size	embedding
SequentialLinearTanh
num_layerstransr3   rs   	__class__s     r4   r   PrefixEncoder.__init__   s    !'!9!9!!"XX//0B0B060B0BDDN,, 2 2F4F4FG 2 2 & 1 1F4F4F F JLMDJ #XX//""F$5$58J8J$JQ$NPDNr7   prefixc                     U R                   (       a$  U R                  U5      nU R                  U5      nU$ U R                  U5      nU$ N)r   r   r   )r3   r   prefix_tokenspast_key_valuess       r4   forwardPrefixEncoder.forward   sE    !! NN62M"jj7O  #nnV4Or7   )r   r   r   )r9   r:   r;   r<   __doc__r   r.   Tensorr   r?   __classcell__r   s   @r4   r   r      s#    P ell  r7   r   c                 ^    SU -  S[         R                  " SU -  SSU -  U -  -   -  5      -   -  $ )zOpenAI's gelu implementation.      ?      ?gQ63E?gHm?)r.   tanhxs    r4   	gelu_implr      sF     7ejj+a/1q 002 3 	34 4r7   c                     [        U 5      $ r   )r   r   s    r4   gelur      s    Q<r7   c                   b   ^  \ rS rSrS\R
                  S4U 4S jjrS rS	S jrU 4S jr	Sr
U =r$ )
RotaryEmbedding   '  Fc                 l  > [         TU ]  5         SU[        R                  " SUS5      R	                  5       U-  -  -  nUR                  5       nX@l        U(       a,  [        R                  R                  U5      U l	        S U l
        O'U R                  SU5        S U l
        S U l        S U l        X0l        g )Nr   r   rU   inv_freq)r   r   r.   arangefloathalf	learnabler
   	Parameterr   max_seq_len_cachedregister_buffer
cos_cached
sin_cached	precision)r3   dimbaser   r   r   r   s         r4   r   RotaryEmbedding.__init__   s    QQ 7 = = ?# EFG==?"!HH..x8DM&*D#  X6&*D#"DO"DO"r7   c                     g r   r8   )r3   
state_dictr   local_metadatastrictmissing_keysunexpected_keys
error_msgss           r4   _load_from_state_dict%RotaryEmbedding._load_from_state_dict   s    r7   c                 j   Uc  UR                   U   nU R                  b  X0R                  :  Ga_  U R                  (       a  S OUU l        [        R                  " X1R
                  U R                  R                  S9n[        R                  " SX@R                  5      n[        R                  " XU4SS9R                  UR
                  5      nU R                  [        R                  :X  a  UR                  5       nUR                  5       S S 2S S S 24   nUR                  5       S S 2S S S 24   nU R                  [        R                  :X  a   UR                  5       nUR                  5       nU R                  (       a  Xx4$ XxsU l        U l        U R                   S U2S4   U R"                  S U2S4   4$ )N)devicedtypezi,j->ijr   .)rm   r   r   r.   r   r   r   r   einsumcattor   bfloat16r   cossinr   r   )	r3   r   seq_dimseq_lentfreqsembr   r   s	            r4   r   RotaryEmbedding.forward   sT   ?ggg&G""*111.2nnd'D#0C0CEALLA}}=E))UN366qxx@C~~/iik 1dA:.J1dA:.J~~/'002
'002
~~!--/9,DOT_xx}-txx}/MMMr7   c                    > U R                   b  U" U R                   5      U l         U R                  b  U" U R                  5      U l        [        TU ]  U5      $ r   )r   r   r   _apply)r3   fnr   s     r4   r   RotaryEmbedding._apply   sF    ??& 1DO??& 1DOw~b!!r7   )r   r   r   r   r   r   )r!   N)r9   r:   r;   r<   r.   r   r   r   r   r   r?   r   r   s   @r4   r   r      s(    !&%** #N2" "r7   r   c                     U SS U R                   S   S-  24   U SU R                   S   S-  S 24   p![        R                  " U* U4UR                  S-
  S9$ )N.r   rU   r!   r   )rm   r.   r   ndim)r   x1x2s      r4   rotate_halfr      se    s%QWWR[A%%%&#qwwr{a/?/@*@(A99
b	GGaK r7   c                     [         R                  " XBR                  S5      5      R                  S5      [         R                  " XCR                  S5      5      R                  S5      p2X-  [	        U 5      U-  -   X-  [	        U5      U-  -   pX4$ )Nr!   rU   )Fr   squeeze	unsqueezer   )qkr   r   position_ids        r4   apply_rotary_pos_emb_indexr      sx     {{;A7AA!D	KQ0::1= 
GA,-A0q4Kr7   c
                    Ub5  US   US   p[         R                  " X4SS9n[         R                  " X4SS9nUR                  u  ppU	(       a  X#4nOS n[        US-   5      nU(       a  U[        R
                  " U5      U-  -  nUR                  S5      UR                  S5      UR                  S5      UR                  S5      4nUR                  US   US   US   -  S5      nUR                  US   US   US   -  S5      n[         R                  " SSSUR                  UR                  S9n[         R                  " UUR                  SS5      UR                  SS5      R                  SS5      SS	S
9nUR                  " U6 nU R                  (       a2  UU R                  l        U R                  UUR                  5       5      nOqUS:H  R!                  5       (       d  UR#                  US5        UR                  nUR                  5       nUU-  n[$        R&                  " USS9nUR)                  U5      nUR                  S5      UR                  S5      UR                  S5      UR                  S5      4nUR                  UR                  S5      US   US   -  S5      nUR                  US   US   -  US   S5      n[         R*                  " UUR                  SS5      5      nUR                  " U6 nUR-                  SSSS5      R                  5       nUR                  5       S S U4-   nUR                  " U6 nUUU4nU$ )Nr   r!   r   rU   r      r   r   g        r   )rP   alpha     )r.   r   rm   r   mathsqrtsizeviewzerosr   r   baddbmmrl   scale_mask_softmaxscale
contiguousallmasked_fill_r   softmaxtypebmmpermute)r3   query_layer	key_layervalue_layerattention_maskhidden_size_per_partitionlayer_id
layer_pastscaling_attention_score	use_cachepast_key
past_valuer   bnhr   presentquery_key_layer_scaling_coeffoutput_sizematmul_resultattention_scoresattention_probsr   context_layernew_context_layer_shapeoutputss                             r4   attention_fnr     sz    )!}jm*IIx3;	ii 9qA #,//G*$)(Q,$7!!IIk"%BBD ##A&(8(8(;##A&	q(9;K "";q>#.q>KN#BBHK {1~{1~A/N!#I KK			!!M MMa#Aq!++Aq1M %));7(E%112B2@2K2K2MO !#((**)).(C &&+113+.KK))$4"=)..u5 ##A&(8(8(;##A&(8(8(;=K ""[^k!n<bBK &**;q>KN+J+6q>2?O IIo{/D/DQ/JKM "&&4M "))!Q15@@BM ,0023B7!;% %!&&(?@Mg7GNr7   c                      ^  \ rS rSrSS\R
                  S4U 4S jjr\S 5       r SS jr	   SS\R                  S\R                  S	\\\R                  \R                  4      S
\S\4
S jjrSrU =r$ )SelfAttentioniu  NTc                 T  > [         [        U ]  5         X0l        Xl        Xl        X l        X l        Xpl        [        U(       a  U R                  U R                  S-  -  OU R                  U R                  -  S[        R                  SS9U l        S U l        Uc
  X-  U l        OX@l        X R                  -  U l        [!        [        R"                  R$                  USU R                  -  UUS9U l        [!        [        R"                  R$                  U R                  UUUS9U l        g )NrU   r   F)r   r   r   r   rQ   r   )r   r  r   r  r   r  num_attention_heads!num_attention_heads_per_partitionposition_encoding_2dr   r.   r   
rotary_embr   hidden_size_per_attention_headinner_hidden_sizer   r
   r   query_key_valuedense)	r3   r   r  r  r  rQ   params_dtyper  r   s	           r4   r   SelfAttention.__init__w  s    	mT+- &)4&#6 1D.$8!).B %%)+ 8 88jj
 #')12=2TD/2P/!47Z7Z!Z  )HHOO&&& 
 HHOO""

r7   c                 *    U R                  US5        U $ )Nr   )r   )r  r   s     r4   attention_mask_func!SelfAttention.attention_mask_func  s    %%nh?r7   c                     UR                  5       S-
  nUR                  5       U   U-  n[        R                  " XUS9nU(       a  [	        S U 5       5      $ U$ )zSplit a tensor along its last dimension.
Arguments:
    tensor: input tensor.
    num_partitions: number of partitions to split the tensor
    contiguous_split_chunks: If True, make each chunk contiguous
                            in memory.
r!   r   c              3   @   #    U  H  oR                  5       v   M     g 7fr   )r   )rH   chunks     r4   rJ   <SelfAttention.split_tensor_along_last_dim.<locals>.<genexpr>  s     E))++s   )r   r   r.   re   tuple)r3   tensornum_partitionscontiguous_split_chunkslast_dimlast_dim_sizetensor_lists          r4   split_tensor_along_last_dim)SelfAttention.split_tensor_along_last_dim  sS     ::<!#h/>Akk&XF"EEEEr7   hidden_statesr   r  r  output_attentionsc                 >   U R                  U5      nUR                  5       SS U R                  SU R                  -  4-   n	UR                  " U	6 nU R                  US5      u  pnU R                  (       Ga   U
R                  SU
R                  S-
  S9u  pUR                  SUR                  S-
  S9u  nnU R                  XR                  5       S-   S9u  nnUSS2SSS24   R                  SS5      R                  5       USS2SSS24   R                  SS5      R                  5       nn[        XUUU5      u  p[        UUUUU5      u  nn[        R                  " X/UR                  S-
  S9n
[        R                  " UU/UR                  S-
  S9nOEUR                  SS5      nU R                  XR                  5       S-   S9u  nn[        XUUU5      u  p[!        U U
UUUU R"                  UUUS	9	u  nnnU R%                  U5      nUU4nU(       a  UU4-  nU$ )
Y
hidden_states: [seq_len, batch, hidden_size]
attention_mask: [(1, 1), seq_len, seq_len]
Nr   r   rU   r!   r   )r   r   )	r3   r   r   r   r   r  r  r  r  )r  r   r  r  r   r0  r  r'  r   r  maxrl   r   r   r.   concatr  r  r  )r3   r2  position_idsr   r  r  r  r3  mixed_raw_layernew_tensor_shaper   r   r   q1q2k1k2r   r   block_position_idsr  r
  r  outputr  s                            r4   r   SelfAttention.forward  sT     ..}= +//1#2622333:
 
 *..0@A 88!L		$$$ &&q{/?/?!/C&EFB__QY^^a-?_AFBr3C3C3E3IJHC/;Aq!G/D/N/NqRS/T/_/_/aQ1W%//15@@B -L/S,OFB/BS0BDFB,,xbggkCKb"XBGGaKAI'11!Q7L%5%5%7!%; ' =HC &@S,&@"K 3?##)&*&D&D!	3!/w M*7#**Gr7   )r  r   r  r  r  r  r  r  r  r  r  r   FNFF)r9   r:   r;   r<   r.   r   r   staticmethodr#  r0  r   r   r   boolr   r?   r   r   s   @r4   r  r  u  s     15#kk&*1
f     =B8 CG"'D||D 	D U5<<#=>?D D  D Dr7   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )GEGLUi  c                 L   > [         TU ]  5         [        R                  U l        g r   )r   r   r   r   activation_fn)r3   r   s    r4   r   GEGLU.__init__  s    VVr7   c                 f    UR                  SUR                  S-
  S9u  p#X R                  U5      -  $ )NrU   r!   r   )r'  r   rI  )r3   r   r   r   s       r4   r   GEGLU.forward  s2    !-&&r***r7   )rI  )r9   r:   r;   r<   r   r   r?   r   r   s   @r4   rG  rG    s    $+ +r7   rG  c                   P   ^  \ rS rSrSSS\\R                  4U 4S jjrS rSr	U =r
$ )GLUi  NTc                 b  > [         [        U ]  5         X0l        XPl        Xl        Uc  SU-  nX l        [        [        R                  R                  U R
                  U R                  UUS9U l        [        [        R                  R                  U R                  U R
                  UUS9U l        g )N   r  )r   rN  r   r  activation_funcr   r  r   r.   r
   r   dense_h_to_4hdense_4h_to_h)r3   r   r  r  rQ   rQ  r   r   s          r4   r   GLU.__init__  s     	c4!# . '$ !K!2&HHOO""
 'HHOO""
r7   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ )z.
hidden_states: [seq_len, batch, hidden_size]
)rR  rQ  rS  )r3   r2  intermediate_parallelr@  s       r4   r   GLU.forward:  s=     !% 2 2= A $ 4 45J K##$9:r7   )rQ  rS  rR  r   r  r  )r9   r:   r;   r<   r   r.   r   r   r   r?   r   r   s   @r4   rN  rN    s(     $(!%#kk
@ r7   rN  c                      ^  \ rS rSrSS\S\R                  SS4U 4S jjr   SS\R                  S\R                  S\	\
\R                  \R                  4      S	\S
\4
S jjrSrU =r$ )GLMBlockiI  NT   c           
         > [         [        U ]  5         X@l        U" XS9U l        Xl        [        UUUUUU	U R
                  S9U l        U" XS9U l        Xl	        [        UUUUU	S9U l        g )Neps)r  rQ   r   r  )r  rQ   r  r   )r   rY  r   r  input_layernormr  r  	attentionpost_attention_layernormr   rN  mlp)r3   r   r  layernorm_epsilonr  r  r  	layernormuse_biasr   r   r  r   s               r4   r   GLMBlock.__init__K  s     	h&( !  )L$8! '+I%!%!:!:< )2)0% % /%
r7   r2  r   r  r  r3  c           
         U R                  U5      nU R                  UUUUUUUS9n	U	S   n
U	SS nSU R                  -  S-  nX-  U
-   nU R                  U5      nU R	                  U5      nX-  U-   nU(       a  U4U-   nU$ U4USS -   nU$ )r5  )r   r  r  r  r3  r   r!   NrU   r   )r^  r_  r   r`  ra  )r3   r2  r8  r   r  r  r  r3  attention_inputattention_outputsattention_outputr  r   	mlp_input
mlp_outputr@  s                   r4   r   GLMBlock.forwardz  s    " ..}= !NN)!/ + 1 -Q/#AB' T__$s*'/2BB11-@	 XXi(
 "Z/j7*G  j712;.Gr7   )r_  r^  r  ra  r   r  r`  rC  )r9   r:   r;   r<   r   r.   r   r   r   r   r   rE  r   r?   r   r   s   @r4   rY  rY  I  s     $(04$#kk&*-
j CG"'2||2 	2 U5<<#=>?2 2  2 2r7   rY  c                      ^  \ rS rSrSrSrSr\rSr	S/r
U 4S jrS\R                  4S	 jrS
 rSS jrSS jr\U 4S j5       rSrU =r$ )ChatGLMPreTrainedModeli  zz
An abstract class to handle weights initialization and
a simple interface for downloading and loading pretrained models.
FTtransformerrY  c                 b   > [         TU ]  " UR                  40 UD6  [         [        U ]  U5        g r   )r   r   name_or_pathr   )r3   rs   kwargsr   s      r4   r   ChatGLMPreTrainedModel.__init__  s*    ,,77eT#F+r7   modulec                     g)zInitialize the weights.Nr8   )r3   rt  s     r4   _init_weights$ChatGLMPreTrainedModel._init_weights  s    r7   c                    UR                   u  p4U Vs/ s H5  oUR                  5       R                  U R                  R                  5      PM7     nn[
        R                  " X4U4US9nUR                  5         [        U5       H  u  pSXxS S 2S U	24'   M     UR                  S5        US:  R                  5       nU$ s  snf )Nr   r!   r   )rm   tolistindexrs   bos_token_idr.   onestril_	enumerate
unsqueeze_rE  )
r3   r)   r   
batch_size
seq_lengthseqcontext_lengthsr   icontext_lengths
             r4   	get_masks ChatGLMPreTrainedModel.get_masks  s    !*
DM
DMSJJLt{{778I 	 
 ZZ$H+13!*?!;A45Na.01 "<!!!$(3.446
s   <B=c                    UR                   u  pVU Vs/ s H5  owR                  5       R                  U R                  R                  5      PM7     nnU R
                  (       a  [        R                  " U[        R                  US9R                  S5      R                  US5      n	[        U5       H  u  pX*   XUS 24'   M     U Vs/ s Hd  n[        R                  " [        R                  " U[        R                  US9[        R                  " Xk-
  [        R                  US9S-   45      PMf     nn[        R                  " USS9n[        R                  " X4SS9n	U	$ [        R                  " U[        R                  US9R                  S5      R                  US5      n	U(       d  [        U5       H  u  pX*   XS & M     U	$ s  snf s  snf )Nr   r   r!   r   )rm   rz  r{  rs   r|  r  r.   r   longr   repeatr  r   r   stack)r3   r)   mask_positionsr   gmaskr  r  r  r  r8  r  r  r?  s                r4   get_position_ids'ChatGLMPreTrainedModel.get_position_ids  s   !*
DM
DMSJJLt{{778I 	 
 $$ <<%**(y|FF:q,A  &/%?!3A3D/0 &@ '6" '6N 		KK&#jj%' LL"3#jj%' *++, - '6  " "'-?Q!G ;;'I+,.L  !<<%**(y|FF:q,A  )2?)C%A4B4EL1 *D A
"s   <GA+Gc                 <    [        U[        5      (       a  X!l        g g r   )
isinstanceChatGLMModelgradient_checkpointing)r3   rt  values      r4   _set_gradient_checkpointing2ChatGLMPreTrainedModel._set_gradient_checkpointing  s    fl++,1) ,r7   c                    > UR                  SS5      nUR                  SS5        [        [        U ]  " SSU0UD6nX#l        U$ )zInstantiate the model.

Args:
    kwargs: Input args.
            model_dir: The model dir used to load the checkpoint and the label information.

Returns:
    The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
	model_dirNcfgpretrained_model_name_or_pathr8   )popr   r   from_pretrainedr  )clsrr  r  rr   r   s       r4   _instantiate#ChatGLMPreTrainedModel._instantiate  sN     JJ{D1	

5$eS1 ?*3?7=?#r7   r8   rB  )r9   r:   r;   r<   r   is_parallelizablesupports_gradient_checkpointingr"   config_classbase_model_prefix_no_split_modulesr   r
   Modulerv  r  r  r  classmethodr  r?   r   r   s   @r4   rn  rn    sb    
 &*# L%#,BII "H2  r7   rn  aM  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general
    usage and behavior.

    Parameters:
        config ([`~ChatGLM6BConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the configuration.
            Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a:
  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`ChatGLM6BTokenizer`].
            See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0, 1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings.
            Selected in the range `[0, config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
            This is useful if you want more control over how to convert *input_ids* indices into associated vectors
            than the model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zdThe bare ChatGLM-6B Model transformer outputting raw hidden-states without any specific head on top.c                     ^  \ rS rSrSrS\4U 4S jjrS rS\R                  4S jr
\R                  4S jr\" \R                  S	5      5      \" \\\S
9         SS\\R*                     S\\R*                     S\\R                     S\\\\R                  \R                  4   S4      S\\R*                     S\\   S\\   S\\   S\\   S\\\R                  S4   \4   4S jj5       5       rSrU =r$ )r  iK  a  

The model can behave as an encoder (with only self-attention) as well
as a decoder, in which case a layer of cross-attention is added between
the self-attention layers, following the architecture described in [Attention is
all you need](https://arxiv.org/abs/1706.03762) by Ashish Vaswani,
Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

To behave as an decoder the model needs to be initialized with the
`is_decoder` argument of the configuration set to `True`.
To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder`
argument and `add_cross_attention` set to `True`; an
`encoder_hidden_states` is then expected as an input to the forward pass.
rs   c                   >^  [         TT ]  U5        UR                  T l        UR                  T l        [        R
                  T l        UR                  T l        UR                  T l        UR                  T l	        UR                  T l
        UR                  T l        T R                  T R                  -  T l        UR                  T l        UR                  T l        UR                  T l        [!        [        R"                  R$                  T R                  T R                  T R                  S9T l        ST l        U 4S jn[        R"                  R+                  [-        T R                  5       Vs/ s H
  o2" U5      PM     sn5      T l        [1        T R                  T R                  S9T l        T R                  b  T R5                  5        H
  nSUl        M     [        R8                  " T R                  5      R;                  5       T l        [?        U5      T l         [        R"                  RC                  S5      T l"        g g s  snf )N)num_embeddingsembedding_dimr   Fc                    > [        TR                  TR                  TR                  U TR                  TR
                  [        STR                  TR                  S9
$ )NT)r  r  rc  rd  r   r  )	rY  r   r  rb  r  r  r   r   r  )r  r3   s    r4   	get_layer(ChatGLMModel.__init__.<locals>.get_layerw  sX      ((&&"&"8"8/3..#!..%)%>%> r7   r\  g?)#r   r   max_sequence_lengthr   r.   r   r   r  
vocab_sizer   rb  r  r  r  r   r   r   r
   r   word_embeddingsr  
ModuleListrangelayersr   final_layernorm
parametersrequires_gradr   r  r   r   prefix_encoderDropoutdropout)r3   rs   r  r  paramr   s   `    r4   r   ChatGLMModel.__init___  s     $*#=#= !--!JJ#)#=#=  ++ ++!'!9!9!'!9!9.2.>.>$BZBZ.Z+$*$?$?!!--!'!9!9(HH??**##	 %
 ',#	 hh))16t1GH1GXYx 1GHJ  )$"8"8 : '*&+# +!&d.>.>!?!D!D!FD"/"7D 88++C0DL ( Is   3Ic                     U R                   $ r   r  r3   s    r4   get_input_embeddings!ChatGLMModel.get_input_embeddings  s    ###r7   new_embeddingsc                     Xl         g r   r  r3   r  s     r4   set_input_embeddings!ChatGLMModel.set_input_embeddings  s    -r7   c                    U R                   R                  S5      R                  US5      R                  U5      nU R	                  U5      R                  U5      nUR                  XR                  U R                  S-  U R                  U R                  U R                  -  5      nU R                  U5      nUR                  / SQ5      R                  S5      nU$ )Nr   r   rU   )rU   r!   r   r   rP  )r   r   expandr   r  r   r   r   r   r  r   r  r   re   )r3   r  r   r   r   r   s         r4   
get_promptChatGLMModel.get_prompt  s    **44Q7>>z?ACCE2f: 	--m<AA%H)..(($//A*=$$ 8 88:
 ,,7)11/BHHKr7   zbatch_size, sequence_length)
checkpointoutput_typer  r)   r8  r   r   .inputs_embedsr  r3  output_hidden_statesreturn_dictr+   c
                 n   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	U R
                  (       a  U R                  (       a	  U(       a  SnUb  Ub  [        S5      eUb  UR                  S S u  pO!Ub  UR                  S S u  pnO[        S5      eUc  U R                  U5      nUGc	  U R                  b3  U R                  UR                  S   UR                  UR                  S9nO"[        S /[        U R                   5      -  5      nUc  U R#                  XR                  S9nUc  U R                   R$                  U R                   R&                  pX;   a  UOUnX;   a  SOSnU Vs/ s H"  nUR)                  5       R+                  U5      PM$     nnU R-                  UUUR                  US	9nU R                  bx  Ubu  [.        R0                  " U
S
UR3                  S5      U R                  5      R5                  UR                  5      nUS:  R7                  5       n[.        R8                  " UU4SS9nUR;                  SS
5      nU(       a  SOS nU(       a  SOS nU(       a  SOS nUc/  [.        R<                  " S
S
UR                  S9R7                  5       nOUR5                  UR                  5      n[?        U R                   5       H  u  nnU(       a  UU4-   nUU   nU R
                  (       aT  U R                  (       aC  [.        R@                  RB                  RC                  UUX#[.        RD                  " U5      UXg5      nO U" UUU[.        RD                  " U5      UUUS9nUS   nU(       a	  UUS
   4-   nU(       d  M  UUU(       a  SOS
   4-   nM     U RG                  U5      nU(       a  UU4-   nU	(       d  [        S UUUU4 5       5      $ [I        UUUUS9$ s  snf )NFzDYou cannot specify both input_ids and inputs_embeds at the same timerU   z5You have to specify either input_ids or inputs_embedsr   )r  r   r   ry  T)r  r   r  r!   r   r   r   r   r8   )r8  r   r  r  r  r3  c              3   0   #    U  H  nUc  M  Uv   M     g 7fr   r8   )rH   vs     r4   rJ   'ChatGLMModel.forward.<locals>.<genexpr>2  s         %q  %s   	)last_hidden_stater   r2  
attentions)%rs   r3  r  r  use_return_dictr  training
ValueErrorrm   r  r   r  r   r   r)  rj   r  r  mask_token_idgmask_token_idrz  r{  r  r.   r}  r   r   rE  r   rl   r   r  utilsr  r*  r  r   )r3   r)   r8  r   r   r  r  r3  r  r  r  r  _MASKgMASK
mask_token	use_gmaskr  r  prefix_attention_maskr2  presentsall_self_attentionsall_hidden_statesr  layerr  	layer_rets                               r4   r   ChatGLMModel.forward  sL   * 2C1N-TXT_T_TqTq$8$D KK,, 	 "+!6IDKK<Q<Q	%0%<k$++B]B]&&4== "	 ]%>V  "%.__Ra%8"J
&(5(;(;BQ(?%JAGI I   00;M"+"&//(q1$++'-- #2 #/
 #(T[[1A(A"B%!%&6&6 "0 "8 #"kk779S9Se&+&8Ud
$)$6DE	 ?H">GsCJJL&&z2i  "  $44#1$++#	  5  % 'N,F$)JJAy~~b1  %""$"^%:%:"; " &;S%@$F$F$H!"YY(=~'N+,.N &//15"2$5b4"6BD!"[[Ai6F6FGLLNN ,..y/?/?@N!$++.HAu#$58I$I!(+J**t}}!KK22===,LLOZO	 "!!-#1"\\!_)'&79	 &aLM#y|&66  &99a!4=8 '8#5 /< ,,]; 1]4E E  x):<O%       '+$+*	
 	
M"s   9)P2)r  r  r  r   r  r  rb  r  r  r  r   r   r  r   r  r   r   r  r  )	NNNNNNNNN)r9   r:   r;   r<   r   r"   r   r  r.   r   r  r   r  r   CHATGLM_6B_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCr   r=   r   rE  r	   r   r   r?   r   r   s   @r4   r  r  K  s~   
31} 31r$.5<< . 49::  +#**+HIK&=$ 1537150448$(,0/3&*H
E,,-H
 u//0H
 !.	H

 "%ellELL.H(I(+), #- .H
   0 01H
 D>H
 $D>H
 'tnH
 d^H
 
uU\\3&')@@	AH
KH
r7   r  )module_namec                   F  ^  \ rS rSrS\4U 4S jjrS rS r  S,S\S\	\
\4   S\S	\S
\	\
\4   4
S jjr    S-S\R                  S\\R"                     S\\R"                     S\\R"                     S\\R"                     S
\4S jjr          S.S\\R"                     S\\R"                     S\\R"                     S\\\R*                        S\\R"                     S\\R"                     S\\   S\\   S\\   S\\   4S jjr\S\\\R"                  \R"                  4   S4   S\R                  S
\\\R"                  \R"                  4   S4   4S j5       rS r\R4                  " 5              S/S\
S\\\
\
4      S\4S  jj5       r\R4                  " 5             S0S\
S\\\
\
4      S\4S! jj5       r\R4                  " 5           S-S"\\   S#\\    S$\\!   S%\\"\\R"                  /\\   4      4S& jj5       r#S1S'\4S( jjr$S)\	S
\	4S* jr%S+r&U =r'$ )2ChatGLMForConditionalGenerationi>  rs   c                   > [         TU ]  U5        UR                  U l        UR                  U l        [	        U5      U l        [        [        R                  UR                  UR                  S[        R                  S9U l        Xl        SU l        U R                  R                   (       a$  U R#                  U R                  R                   SS9  [$        R&                  " UR(                  5      U l        g )NFr  T)
empty_init)r   r   r  r  r  ro  r   r
   r   r   r  r.   r   lm_headrs   	quantizedquantization_bitquantizer#   r  rq  	tokenizerr   s     r4   r   (ChatGLMForConditionalGeneration.__init__A  s     
 $*#=#= $*$?$?!'/ II** ;;''MM$++664MH *99&:M:MNr7   c                     U R                   $ r   r  r  s    r4   get_output_embeddings5ChatGLMForConditionalGeneration.get_output_embeddings^  s    ||r7   c                     Xl         g r   r  r  s     r4   set_output_embeddings5ChatGLMForConditionalGeneration.set_output_embeddingsa  s    %r7   r  model_kwargsis_encoder_decoderstandardize_cache_formatr+   c           	         U R                  XS9US'   SU;   a  US   nUb  UR                  [        R                  :X  ar  [        R                  " UUR                  / UR                  S S QSP75      /SS9nUS S 2S S 2SS 24   R                  5       nSUS	'   [        R                  " XV/S
S9US'   SU;   aI  US   nUSSS 24   R                  5       nUS S 2SS S 24==   S-  ss'   [        R                  " Xx/SS9US'   U$ )N)r   r   r   r   r!   r   r   F).r   rU   r8  .)_extract_past_from_model_outputr   r.   rE  r   new_onesrm   clone)	r3   r  r  r  r   r   new_attention_maskr8  new_position_ids	            r4   #_update_model_kwargs_for_generationCChatGLMForConditionalGeneration._update_model_kwargs_for_generationd  s=    +/*N*N +O +H&' |+)*:;N)n.B.Bejj.P!&""++,Jn.B.B2A.F,J,JK, 01	"2
 &4Aq"#I%>%D%D%F".3"7+16#8a2A-. \)'7L*384::<OAq!G$)$+099/R,9L( r7   r)   pastr   r   r8  c           	         UR                   u  pxU R                  R                  U R                  R                  pX;   a  U
OU	nX;   a  SOSnUR	                  5       nU Vs/ s H  oR                  U5      PM     nnUc  UGbY  US S 2S4   R                  S5      nUb-  UR                  [        R                  :X  a  US S 2S S 2SS 24   nOS nUb
  USSS 24   nOU Vs/ s H'  oR                  U R                  R                  5      PM)     nnU R                  (       ac  [        R                  " [        UU5       VVs/ s H  u  nnUUU-
  /PM     snn[        R                  UR                  S9R                  S5      nON[        R                  " U Vs/ s H  nUPM     sn[        R                  UR                  S9R                  S5      nUc  UnUUUUS.$ Ub   UR                  [        R                  :w  a  S nUc  U R!                  XR                  S9nUc  U R#                  UUR                  UUS9nUUUUS.$ s  snf s  snf s  snnf s  snf )	NTFr   .r   )r)   r   r8  r   ry  )r   r  r  )rm   rs   r  r  rz  r{  r   r   r.   rE  r|  r  r*  rd   r  r   r  r  )r3   r)   r	  r   r   r8  rr  r  r  r  r  r  r  seqsr  r  
last_tokenr  mask_positionr  s                       r4   prepare_inputs_for_generation=ChatGLMForConditionalGeneration.prepare_inputs_for_generation  sv    "+
kk//1K1Ke#0Ud
!.DE	!;?@4C))J/4@ :"1b5)33B7J)n.B.Bejj.P!/1bc	!:!%'+CH5 DH#CGCIIdkk6674   # ,,#(<<>A+_?>??>:]N (n)DE?>? $jj(//$1
 2;2 ! $)<<<JKN=NK#jj(//$1 2;2 !
 |&'#' ,"0	  )n.B.Bejj.P!%%!%&6&6 "0 "8##44$++#1#	  5  % '#' ,"0	 e A#
? Ls    I.I>I
Ir  labelsr  r3  r  r  c                 ^   Ub  UOU R                   R                  nU
b  U
OU R                   R                  n
U R                  UUUUUUUU	U
S9	nUS   nU R	                  U5      R                  SSS5      R                  5       nS nUb  UR                  [        R                  5      nUSS S2S S 24   R                  5       nUSSS 24   R                  5       n[        SS9nUR                  UR                  5      nU" UR                  SUR                  S5      5      UR                  S5      5      nUR                  UR                  5      nUR                  UR                  5      nU
(       d  U4USS  -   nUb  U4U-   $ U$ [        UUUR                   UR"                  UR$                  S	9$ )
N)	r)   r8  r   r   r  r  r3  r  r  r   r!   rU   .r   i)ignore_index)losslogitsr   r2  r  )rs   r  r  ro  r  r   r   r   r.   float32r   r   r   r   r   r   r   r2  r  )r3   r)   r8  r   r   r  r  r  r3  r  r  transformer_outputsr2  	lm_logitsr  shift_logitsshift_labelsloss_fctr@  s                      r4   r   'ChatGLMForConditionalGeneration.forward  s    "+!6IDKK<Q<Q	%0%<k$++B]B]"..%)+'/!5# / 

 ,A.LL/771a@KKM	!U]]3I %S#2#q[1<<>L!#qr'?557L'T:H'??<+>+>?L!!"l&7&7&;<!!"%'D "]%8%89I77=../D]%8%<<F*.*:THv%FF%/??-;;*55
 	
r7   .beam_idxc                 .   ^ [        U4S jU  5       5      $ )a$  
This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
[`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
beam_idx at every generation step.

Output shares the same memory storage as `past`.
c           	   3      >#    U  Hg  nUS    R                  STR                  US    R                  5      5      US   R                  STR                  US   R                  5      5      4v   Mi     g7f)r   r!   N)index_selectr   r   )rH   r  r  s     r4   rJ   AChatGLMForConditionalGeneration._reorder_cache.<locals>.<genexpr>  sl      " !j qM&&q(++jm6J6J*KLqM&&q(++jm6J6J*KL
 !s   A/A2)r)  )r	  r  s    `r4   _reorder_cache.ChatGLMForConditionalGeneration._reorder_cache
  s!      " !" " 	"r7   c                    UR                  5       nUR                  SS5      nSS/SS/SS/S	S
/SS//nU HK  n[        R                  " SUS   -  SUS   -  U5      n[        R                  " SUS   -  SUS   -  U5      nMM     U$ )Nu   [[训练时间]]u   2023年,u   ，!u   ！:u   ：;u   ；z\?u   ？z([\u4e00-\u9fff])%sr   z\1%sr!   z%s([\u4e00-\u9fff])z%s\1)stripreplacerW   sub)r3   responsepunktsitems       r4   process_response0ChatGLMForConditionalGeneration.process_response  s    >>###$6	B%L%L%L%L5M
 Dvv4tAw>%Q/;Hvv4tAw>%Q/;H 
 r7   queryhistory
max_lengthc
                 *   Uc  / nU	c
  [        5       n	U	R                  [        5       5        UUUUUU	S.U
EnU(       d  UnOLSn[        U5       H  u  nu  pUSR	                  XU5      -  nM     USR	                  [        U5      U5      -  nU" U/SS9nUR                  U R                  5      nU R                  " S	0 UDUD6nUR                  5       S   [        US   S   5      S  nUR                  U5      nU R                  U5      nX2U4/-   nX4$ )
N)r2  	num_beams	do_sampletop_ptemperaturelogits_processor    [Round {}]
问：{}
答：{}
   [Round {}]
问：{}
答：ptreturn_tensorsr   r)   r8   )r   rc   r'   r  r  rj   r   r   generaterz  decoder.  )r3   r  r0  r1  r2  r4  r5  r6  r7  r8  rr  
gen_kwargspromptr  	old_queryr+  inputsr  s                     r4   _chat%ChatGLMForConditionalGeneration._chat,  sJ    ?G#24 ; =>$""& 0
 

 FF,5g,>((I<CC(, , -? 4;;CL%PPFF8D94;;'--7&7J7.."1%c&*=a*@&A&BC##G,((2X.//  r7   c	              +   F  #    Uc  / nUc
  [        5       nUR                  [        5       5        UUUUUS.U	En
U(       d  UnOLSn[        U5       H  u  nu  pUSR	                  XU5      -  nM     USR	                  [        U5      U5      -  nU" U/SS9nUR                  U R                  5      nU R                  " S	0 UDU
D6 HW  nUR                  5       S   [        US   S   5      S  nUR                  U5      nU R                  U5      nX2U4/-   nUU4v   MY     g 7f)
N)r2  r5  r6  r7  r8  r9  r:  r;  r<  r=  r   r)   r8   )r   rc   r'   r  r  rj   r   r   stream_generaterz  r@  r.  )r3   r  r0  r1  r2  r5  r6  r7  r8  rr  rA  rB  r  rC  r+  rD  r  new_historys                     r4   stream_chat+ChatGLMForConditionalGeneration.stream_chatW  sT     ?G#24 ; =>$"& 0
 

 FF,5g,>((I<CC(, , -? 4;;CL%PPFF8D94;;'++CfC
CGnn&q)#f[.A!.D*E*FGG ''0H,,X6H!X%6$77KK'' Ds   DD!generation_configr8  stopping_criteriaprefix_allowed_tokens_fnc           	   +     ^#    UR                   S   UR                   S   pUc  U R                  n[        R                  " U5      nUR                  " S0 UD6n	UR
                  UR                  p[        U
[        5      (       a  U
/n
UR                  S5      S L =(       a    UR                  S LnU(       a7  UR                  c*  [        R                  " SUR                   S3[        5        O]UR                  bP  UR                  U-   Ul
        U(       d5  [        R                  SUR                   SUR                   S3[        5        XR                  :  aH  U R                   R"                  (       a  S	OS
n[        R%                  SU SU SUR                   S35        Ub  UO	['        5       nUb  UO	[)        5       nU R+                  UUUUUS9nU R-                  UUS9nU R/                  U5      nUR1                  UR                   S   5      R3                  S5      nS n U R4                  " U40 U	D6nU " S0 UDSSSS.D6nUR6                  S S 2SS S 24   nU" UU5      nU" UU5      n[8        R:                  R=                  USS9nUR>                  (       a%  [@        RB                  " USS9RE                  S5      mO[@        RF                  " USS9m[@        RH                  " UTS S 2S 4   /SS9nU RK                  UU	U R                   R"                  S9n	URM                  [O        U4S jU
 5       5      RQ                  5       5      nURS                  5       S:X  d  U" X5      (       a  g Uv   GMP  7f)Nr   r   r2  zUsing `max_length`'s default (z) to control the generation length. This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we recommend using `max_new_tokens` to control the maximum length of the generation.zBoth `max_new_tokens` (=z) and `max_length`(=z) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)decoder_input_idsr)   zInput length of z is z, but `max_length` is set to zX. This can lead to unexpected behavior. You should consider increasing `max_new_tokens`.)rL  input_ids_seq_lengthencoder_input_idsrN  r8  )rL  rM  r!   TF)r  r3  r  r   )num_samples)r  c              3   .   >#    U  H
  nTU:g  v   M     g 7fr   r8   )rH   r  next_tokenss     r4   rJ   BChatGLMForConditionalGeneration.stream_generate.<locals>.<genexpr>  s     <|![A%|s   r8   )*rm   rL  copydeepcopyupdater|  eos_token_idr  rk   getr2  max_new_tokenswarningswarnUserWarningr   rs   r  warningr   r   _get_logits_processor_get_stopping_criteria_get_logits_warpernewfill_r  r  r
   
functionalr   r5  r.   multinomialr   argmaxr   r  mulsumr  r6  )r3   r)   rL  r8  rM  rN  rr  r  rQ  r  rZ  has_default_max_lengthinput_ids_stringlogits_warperunfinished_sequencesr*   model_inputsr  next_token_logitsnext_token_scoresprobsrU  s                        @r4   rH  /ChatGLMForConditionalGeneration.stream_generate  s     #,//!"4ioo7 $ $ 6 6 MM*;<(//9&9+88:K:X:X<lC(((>L!'"!"" "O&7&B&B$&N 	!&7&F&F&NMM01B1M1M0N Oe e 	 --9+<+K+KNb+b()./@/O/O.PPd(334 5ff    #?#??6:kk6T6T2ZeNN"#3"4D9M8N O%001 2001 0@/K+Qd R
1B1N-Th U
  55/!5'%=- 6 
 !77// 8 1 //0AB(}}Y__Q-?@FFqI==+)+L   "'%*	G !(q"ax 8 !1<M N -i9J K MM))*;)DE **#//q**1'!*  $ll5b9 		9k!T'.B"CLICC#';;#A#A D CL $8#;#;<|<<BBD$F  $'')Q.2C3' 3'OM s   M0M3bitsc                     US:X  a  g SSK Jn  U R                  (       a  [        R	                  S5        U $ SU l        XR
                  l        U" U R                  U4SU0UD6U l        U $ )Nr   r!   )r  zAlready quantized.Tr  )quantizationr  r  r   r_   rs   r  ro  )r3   rt  r  rr  r  s        r4   r  (ChatGLMForConditionalGeneration.quantize  si    19*>>KK,-K'+$#dE/9E=CEr7   inputc           
         US   nUS   nSU;   a  US   nOSnSU;   a  US   nOSnSU;   a  US   nOSnS	U;   a  US	   nOS
n[        U5      [        R                  :X  a  UR                  5       nU R	                  U R
                  UUUUUUS9u  p[        R                  S5        [        R                  U[        R                  U0$ )Ntextr1  r2     r7  ffffff?r4  r!   r5  T)r2  r7  r4  r5  zGeneration finished.)r   r.   r   rz  rE  r  r   r_   r   RESPONSEHISTORY)	r3   rx  rz  r1  r2  r7  r4  r5  r+  s	            r4   chat$ChatGLMForConditionalGeneration.chat  s    V}	"5 |,JJE!.KK%k*II%k*II=ELL(nn&G JJNN!# ' ! 	*+##Xz/A/A7KKr7   )rs   r  r  r  r  r  ro  )FF)NNNN)
NNNNNNNNNN)Nr{  r!   Tffffff?r|  N)Nr{  Tr  r|  NrB  )(r9   r:   r;   r<   r"   r   r  r  r   r   strr   rE  r  r.   r=   r   r   dictr  r   r>   r   rD  r!  r.  no_gradr   rk   rE  rJ  r   r   r   r   rH  r  r  r?   r   r   s   @r4   r  r  >  s   O} O:& $)).!! 38n! !	!
 #'! 
c3h!L ,06:5937D''D 5<<(D &ell3	D
 %U\\2D #5<<0D DP -1/315>B04)-$(,0/3&*;
ELL);
 u||,;
 !.	;

 "%(9(9":;;
  -;
 &;
 D>;
 $D>;
 'tn;
 d^;
z "E%,,45 "$)$4$4" 
uU\\5<</0#5	6" " " ]]_ 04 $#(!(! E#s(O,(! 	(! (!T ]]_ 6:&*" $%)&(&( "%S/2&( !$	&( &(P ]]_ 9=:><@BFl $$45l ##67	l
 $$89l #+8S%,,4G48I5> ,? #@l l\S $#L$ #L4 #L #Lr7   r  )NTF)^r   rW  r   r\   rW   sysr]  typingr   r   r   r   r   r   r	   r.   torch.nn.functionalr
   rf  r   torch.utils.checkpointtorch.nnr   r   torch.nn.utilsr   &transformers.generation.logits_processr   transformers.generation.utilsr   r   r   r   transformers.modeling_outputsr   r   r   transformers.modeling_utilsr   transformers.utilsr   r   r   modelscope.metainfor   modelscope.modelsr   r   r   modelscope.outputsr   modelscope.utilsr   loggingmodelscope.utils.constantr    configurationr"   tokenizationr#   platform_C_jit_set_profiling_mode_jit_set_profiling_executor_jit_override_can_fuse_on_cpu_jit_override_can_fuse_on_gpu
get_loggerr  r  (CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LISTr'   r   r  r   jitscriptr   r   r   r   r   r  r  rG  rN  rY  rn  CHATGLM_6B_START_DOCSTRINGr  r  register_moduler  
chatglm_6br  r8   r7   r4   <module>r     sk      	 	 
  D D D      0 $ BA A  8G G ' 7 7 ) . + ( * <<8	HH$$U+	HH((/	HH**40	HH**40				( # , (/ GTEHHOO @ 4 43"ehhoo 3"l  "  slTEHHOO Tn	+EHHOO 	+.%((// .bcuxx cL\Z \~	 / d jl
) l
	l
^ 

0A0ABeL&< eL CeLr7   