
    i(                         S SK r S SKrS SKJs  Jr  S SKJr   " S S\R                  5      r\R                  " 5        " S S\R                  5      5       r	g)    N)nnc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )EncoderLayer   c                   > [         TU ]  5         Xl        U R                  U l        U R                  U l        [
        R                  " U R                  U5      U l        [
        R                  " U R                  U5      U l        [
        R                  " X5      U l	        [
        R                  " X5      U l
        X l        [
        R                  " U R                  5      U l        [
        R                  " U R                  5      U l        SU-  n[
        R                  " X5      U l        [
        R                  " X15      U l        g )N   )super__init__	embed_dimkdimvdimr   Lineark_projv_projq_projout_proj	num_heads	LayerNormself_attn_layer_normfinal_layer_normfc1fc2)selfr   r   ffn_embed_dim	__class__s       `/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/executorch/exir/tests/transformer.pyr
   EncoderLayer.__init__   s    "NN	NN	ii		95ii		95ii	5		)7"$&LL$@! "T^^ < 	M 	 99Y699]6    c                    UnU=n=pE[         R                  " UUUU R                  U R                  U R                  R
                  U R                  R
                  U R                  R
                  U R                  R
                  U R                  R                  SSSS S S S S9u  pX!-   nU R                  U5      nUn[         R                  " U R                  U5      5      nU R                  U5      nX!-   nU R                  U5      nU$ )NFg        T)q_proj_weightk_proj_weightv_proj_weightout_proj_weightout_proj_biasadd_zero_attn	dropout_puse_separate_proj_weightin_proj_weightin_proj_biasbias_kbias_v)Fmulti_head_attention_forwardr   r   r   weightr   r   r   biasr   relur   r   r   )r   xresidualquerykeyvalue_s          r   forwardEncoderLayer.forward$   s    --NNNN++,,++,,++,, MM00--,,%)%
( L%%a(FF488A;HHQKL!!!$r   )r   r   r   r   r   r   r   r   r   r   r   r   )r   )__name__
__module____qualname____firstlineno__r
   r7   __static_attributes____classcell__r   s   @r   r   r      s    7* r   r   c                   <   ^  \ rS rSrSrSU 4S jjrS rS rSrU =r	$ )TransformerF   av  
A simplified implementation of mt_model that does not have all those heavy
dependencies but still be similar enough to the original model.

Suitable to be put in exir end2end tests. E.g., we can use it to ease the
testing of memory planning for dynamic shapes on REAL models.

Some of the simplifications recorded here:
1. the original model will reset the embedding to a 0 vector for padding token.
   We skip that.
2. skip various configurations in the original model. E.g., original model
   has a config cfg.no_scale_embedding to control if the token embedding
   should be scaled or not. We just always scale the embedding.
c                 T  > [         TU ]  5         Xl        SU l        [        R
                  " U R                  U R                  5      U l        [        R                  " U R                  5      U l	        [        U5       Vs/ s H  n[        U R                  S9PM     snU l        g s  snf )N    )r   )r	   r
   inp_vocab_size	model_dimr   	Embeddingtoken_embed_tablemathsqrtembed_scaleranger   encoder_layers)r   rE   rF   num_encoder_layersr6   r   s        r   r
   Transformer.__init__W   s~    ,!#d.A.A4>>!R99T^^4<ABT<U
<UqL4>>2<U
 
s   ?B%c                     U R                  U5      nUR                  SS5      nU R                   H  nU" U5      nM     U$ )Nr      )rH   	transposerM   )r   
src_tokensembedr1   layers        r   encodeTransformer.encodea   sD    &&z2 OOAq!((EaA ) r   c                 t    US:X  a$  Sn[         R                  " SU R                  SU4S9$ [        SU S35      e)NrV   
   r   rQ   )lowhighsizezmethod z is not supported yet)torchrandintrE   AssertionError)r   methodseqlens      r   get_random_inputsTransformer.get_random_inputsn   sN    XF==((  !76(2G!HIIr   )rK   rM   rE   rF   rH   )rY   rD   r   )
r9   r:   r;   r<   __doc__r
   rV   rb   r=   r>   r?   s   @r   rA   rA   F   s    
J Jr   rA   )
rI   r]   torch.nn.functionalr   
functionalr,   Moduler   no_gradrA    r   r   <module>rj      sO        5299 5p 3J")) 3J 3Jr   