
    9iB                     \    S SK r S SKJr  S SKJr  \R
                  " 5       r " S S\5      rg)    N)PretrainedConfig)loggingc            $          ^  \ rS rSrSrSSSSSSSS	S	S
SSSSSSSSSSSS	SSS	SSSSSS/SSSSS4$U 4S jjr\S 5       rSrU =r	$ )GPTMoEConfig   zgpt-moei d  i   N   i   gelug?i      g-q=TFg{Gz?   d   r   g?standardc%                   > [         T(U ]  " SSU0U%D6  Xl        X l        Uc  SU-  OUU l        X@l        XPl        Xpl        X`l        Xl	        Xl
        Xl        Xl        Xl        Xl        Xl        Xl        UU l        UU l        U(       a	  U(       a   eUU l        UU l        Uc  X%-  S:X  d   eX%-  U l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l        UU l         U U l!        U!U l"        U"U l#        U#U l$        U$U l%        U R@                  S   [L        RN                  RQ                  5       :  a$  [L        RN                  RQ                  5       U l)        OU R@                  S   U l)        [U        [L        RV                  RY                  S5      S   5      n&[U        [L        RV                  RY                  S5      S   5      n'U&S:  =(       d    U&S:H  =(       a    U'S:  U l-        g )Nlayer_norm_eps   r   .       ).super__init__
vocab_sizehidden_sizeffn_hidden_sizenum_hidden_layersnum_attention_heads
hidden_actintermediate_sizehidden_dropout_probattention_probs_dropout_probmax_position_embeddingstype_vocab_sizelayernorm_epsilonbias_gelu_fusionfp32_residual_connectionsequence_parallelfp16bf16apply_query_key_layer_scalingattention_softmax_in_fp32kv_channelsmasked_softmax_fusionattention_dropoutbias_dropout_fusion(apply_residual_connection_post_layernormhidden_dropoutinit_method_stdeod_idtokens_to_generatetop_ktop_pnum_experts	use_tuteltop_k_linear_strategyuse_expert_residual_networkload_ds_ckpts	model_dirtorchcudadevice_countmoe_expert_parallel_sizeint__version__splitno_persist_layer_norm))selfr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   kwargsTORCH_MAJORTORCH_MINOR	__class__s)                                           k/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/gpt_moe/configuration.pyr   GPTMoEConfig.__init__   s   P 	D(9DVD$&&  !;,; 	!2#6 $!2#6 ,H)'>$.!2 0(@%!2		T""-J*)B&4999*AD%:"!2#6 4 	5,."4

&"%:"+F(*"A!8!8!::,1JJ,C,C,ED),0,<,<Q,?D)%++11#6q9:%++11#6q9:!OFq 0 E[25E 	"    c                     U R                   (       a  [        R                  $ U R                  (       a  [        R                  $ [        R
                  $ )N)r&   r;   halfr'   bfloat16float)rC   s    rH   params_dtypeGPTMoEConfig.params_dtypey   s.    99::YY>>!;;rJ   )&r(   r.   r,   r   r)   r'   r-   r#   r1   r   r&   r$   r   r/   r   r   r0   r   r*   r"   r9   r+   r    r:   r>   rB   r   r5   r   r%   r2   r3   r7   r4   r!   r8   r6   r   )
__name__
__module____qualname____firstlineno__
model_typer   propertyrO   __static_attributes____classcell__)rG   s   @rH   r   r      s    J    "" #),$(#!%*#*.&+"&! $5: "",(-M\G|  rJ   r   )r;    transformers.configuration_utilsr   transformers.utilsr   
get_loggerloggerr   r   rJ   rH   <module>r]      s-      = &				i# irJ   