
     Ti~A                    V   S SK Jr  S SKJrJr  S SKrS SKrS SK	J
s  Jr  S SKJrJrJr   \\\R"                  4   rSS jrS r " S S\R*                  5      r\" 5       r " S	 S
\R0                  5      r\R5                  5       r\R8                  " \/5      r\R<                  " \5      rg)    )annotations)SequenceUnionN)_basics	_ir_utilspatternc                ^    XR                   R                  ;   =(       a    U R                  U:H  $ N)graphinputsname)valuer   models      b/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxscript/rewriter/ort_fusions/gqa.py_is_model_inputr   #   s#    KK&&&=5::+==    c                ,   U R                  USSS9nU R                  U5      nU R                  USSS9n	U R                  U	5      n
U R                  X5      nU R                  US/5      nU R	                  XS5      nU R                  X|SS9nU R                  XN5      nU R                  USSS9n[        R                  " UU/5      nU R	                  SUS5      nU R                  USS/5      nU R                  UU5      nU R                  UU5      nU R                  UU5      nU R                  UU5      n[        R                  " UU/5      nU R                  UUS	9nU R                  UU5      nU R!                  USS/5      nU R                  UU5      nU$ )
zODefines a pattern for a pure causal mask, with optional sliding window support.      )endstart   r   axis)upperto)ShapeSqueezeAddReshapeRangeConcatExpandTrilur   OrValueGreaterSubLessOrEqualOrCastMul	Unsqueeze)op	input_idspast_kv_cache
shape_B111min_valwindow_sizedtypeseq_len
seq_len_0Dpast_seq_lenpast_seq_len_0Dtotal_seq_len_0Dtotal_seq_lencurrent_range
mask_shapemask_all_min_expandmask_all_min_trilumask_all_mintotal_range_as_rowcurrent_range_as_column
non_causalcurrent_range_minus_windowout_of_sliding_windownon_causal_sliding_windowboolean_maskfloat_0_1_maskfloat_0_min_maskmask_4d_11STmask_4d_B1STs                                r   _causal_maskrL   '   s    hhyaqh1GG$J88Mq8:Ljj.Ovvo:JJ/"6MHH_BM7:J))G8"5qB??$79K#LML!%5q9 jjQ@.0GHJ "$(?!MNN+=?YZ "j2G H??J0I#JKLWW\eW4NvvlN;<< 01a&9L99\:6Lr   c                  $    \ rS rSrS rSS jrSrg)_CausalMaskPatternT   c
           	     V   [        UUUUUUU5      n
UR                  USS/5      nUR                  XS9nUR                  X5      nUR                  U[        R
                  R                  S9n[        R                  " X/5      nUR                  US5      nUR                  XU
5      nU$ )Nr   r   r   g        )rL   r.   r,   r!   irDataTypeFLOATr   r'   EqualWhere)selfr/   r0   r1   r2   r3   r4   dtype1attn_mask_2ddtype2causal_maskattn_mask_4dattn_mask_4d_castsumsum_fp32is_zeroresults                    r   r   _CausalMaskPattern.patternU   s     #
 ||L1a&9GGLG<ff[47732;;#4#475??H?3((8S)'K8r   Nc                   [        USUR                  5      (       d%  [        R                  " 5       R	                  SU5      $ UR                  5       UR                  5       :w  a&  [        R                  " 5       R	                  SX#/5      $ [        R                  " U5      nUc%  [        R                  " 5       R	                  SU5      $ [        R                  " UR                  R                  5       5      R                  n	X:w  a+  [        R                  " 5       R	                  SU	 SU 3U5      $ U(       a%  [        R                  " 5       R	                  SU5      $ g)	Nattention_maskzInvalid attention_mask inputzDtype mismatchzMinval is not a constant.zExpected min value z, got z Sliding window not yet supportedT)r   r   r   MatchResultfailas_intr   get_singleton_valuenpfinfor5   numpymin)
rV   contextrW   rY   r3   rX   sliding_window_	min_valueexpected_min_values
             r   check_CausalMaskPattern.checkv   s   |-=w}}MM&&(--.Ll[[==?fmmo-&&(--.>@PQQ 11':	&&(--.I7SSXXgmm&9&9&;<@@*&&(--%&8%9	{KW 
 &&(--2N  r    r
   )__name__
__module____qualname____firstlineno__r   rq   __static_attributes__rs   r   r   rN   rN   T   s    Br   rN   c                  X   ^  \ rS rSrU 4S jrS r    S SS jjr    SS jrSrU =r	$ )	GroupQueryAttention   c                "   > [         TU ]  SSS9  g )NGQAF)remove_nodes)super__init__)rV   	__class__s    r   r   GroupQueryAttention.__init__   s    U3r   c           	        UR                  U[        R                  S/S9nUR                  U[        R                  SS/S9n[        R                  " X/5      nUR                  U/ SQS9nUR                  U[        R                  SS/S9n[        R                  " X/5      nUR                  U[        R                  S	/S9nUR                  U[        R                  SS
/S9n[        R                  " UU/5      nUR                  U/ SQS9nUR                  U[        R                  SS/S9n[        R                  " UU/5      nUR                  U[        R                  S/S9nUR                  U/ SQS9nUR                  UUUU	SS/S9nUR                  UUUU	SS/S9nUR                  UUSS9n[        R                  " UU/5      nUR                  US/5      nUR                  U[        R                  5      nUR                  U[        R                  S/S9nUR                  UUSS9n[        R                  " UU/5      nUR                  US/5      nUR                  U[        R                  5      nUR                  U[        R                  S/S9nUR                  UUUU
SSS9nUR                  U/ SQS9n UR                  U [        R                  S/S9n!U!UU4$ )Nquery_BSHDh)_outputsr   query_BSHDh_normalized)r   r   )r   r   r   r   )permquery_BHSDh_normalizedkey_BSHkvDhkey_BSHkvDh_normalizedkey_BHkvSDh_normalizedvalue_BSHkvDhcom.microsoftquery_BHSDh_rope)_domainr   key_BHkvSDh_roper   r   key_seq_BHTDhvalue_seq_BHTDhBHSdzai.onnxruntime._fusion)
key_formatr   attention_BSD)r"   r   	ANY_VALUESimplifiedLayerNormalizationr'   	TransposeRotaryEmbeddingr$   r.   r%   SDPA)"rV   r/   	query_BSD	key_BSDkvvalue_BSDkvpast_key
past_valueposition_idscossinmaskr   r   query_BHSDhr   r   r   key_BHkvSDhr   r   value_BHkvSDhr   r   key_seq_BHkvTDhkey_seq_BHkv1TDhkey_seq_BHkvGTDhr   value_seq_BHkvTDhvalue_seq_BHkv1TDhvalue_seq_BHkvGTDhr   attention_BHSDhattention_BSHDhr   s"                                     r   r   GroupQueryAttention.pattern   sR    jjG,=,=jX "$!@!@**?W>X "A "
 oo{&KL ll;\lB "$!@!@**?W>X "A "
 oo{&KL jjG,=,=jX!#!@!@**?W>X "A "
 oo{4J&KL ll;\lB "$!@!@**?W>X "A "
 oo{4J&KL 

;0A0A_L]
^]F--#() . 
 --#() . 
 ))H.>R)H!//?<L*MN<<!=99%5w7H7HI

g//?:K # 
 IIj-bII#OO->,NO\\*;aSAYY'97;L;LM** 1 1=N<O % 
 '', " 
 ,,\,J

W../9J # 
 o/@@@r   c                  ^ [         R                  " 5       nUb  Ub  UR                  SX/5      $ Ub  Ub  UR                  SX/5      $ 0 mSU4S jjnU" U/ SQ5      (       a  gU" U/ SQ5      (       a  gU" U/ SQ5      (       a  gUb  U" U/ SQ5      (       a  gUb  U" U/ SQ5      (       a  g[        R                  " U	S	5      n[        R                  " U
S	5      n[        U[        5      (       d  UR                  S
U	5      $ [        U[        5      (       d  UR                  SU
5      $ UU l        UU l        UR                  5       R                  nUR                  5       R                  nUR                  SS5      nUR                  SS5      nUU:w  aC  [         R                  " 5       R                  SUR                  5       UR                  5       /5      $ UU l        UR                  5       nUc%  [         R                  " 5       R                  SU5      $ [        R                  UR                  UR                   USS9nUc%  [         R                  " 5       R                  SU5      $ g)NzQuery normalized twicezKey normalized twicec                <   > [         R                  " TX5      (       + $ r
   )_fusion_utilscheck_shape_bool)valdimsbindingss     r   no_match+GroupQueryAttention.check.<locals>.no_match$  s    $55hJJJr   )BSDF)r   r   Dkv)r   HkvPDh)r   r   r   Dvr   z#Unable to determine num_heads valuez&Unable to determine kv_num_heads valueinterleavedr   z/Rotary embedding interleaved attribute mismatchzUnhandled mask pattern)check_nodes_are_removablez'Mask does not match causal mask patternT)r   ir.Valuer   zSequence[str]returnbool)r   rd   re   r   get_dim
isinstanceint	num_headskv_num_headsproducer
attributesget_int_interleaved_causal_mask_patternmatchr   graph_or_function)rV   rl   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rn   r`   r   r   r   query_rotary_attributeskey_rotary_attributesquery_interleavedkey_interleaved	mask_nodemask_match_resultr   s                              @r   rq   GroupQueryAttention.check  se   & $$&!-2H2T;;('@  "-2H2T;;&'@  $&	K I//I011K!233HX7N$O$O!hz;R&S&S %%k15	 ((a8)S));;DkRR,,,;;GUU"( #3";";"="H"H 0 9 9 ; F F3;;M1M/77qI/&&(--A!**,.>.G.G.IJ  . MMO	&&(--.FMM066MM%%&+	 7 
 $&&(--.WY]^^ r   c                P   UR                  [        R                  " S[        R                  R                  S9S9nUR                  [        R                  " S/[        R                  R
                  S9S9nUR                  [        R                  " S/[        R                  R
                  S9S9nUR                  UUSS9nUR                  U[        R                  R                  S9nUR                  UUSS9nUR                  UU5      nU=(       d    UnUbb  UR                  5       nUR                  nUR                  S   nUR                  " UU40 UD6nUR                  / SQS9nUR                  UU5      nU=(       d    UnUbb  UR                  5       nUR                  nUR                  S   nUR                  " UU40 UD6nUR                  / SQS9nUR                  UU5      nUR                  UUUUUUUUU	U R                  U R                   SU R"                  S	S
S9$ )Nr   )r5   )r   r   )keepdimsr   )r   r   r   )
value_intsr   r   )r   r   	do_rotaryrotary_interleavedr   r   )ConstantrQ   tensorrR   INT32INT64	ReduceMaxr,   r!   r   r   r   r   r"   rz   r   r   r   ) rV   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rn   one_int32_0done_int64_1dzero_int64_1dseqlens_k_int64	seqlens_kmax_seq_lengthtotal_seq_length_int32normalized_query	norm_node
norm_attrs
norm_scalereshape_BSHDh_to_BSDnormalized_keyreshape_BSHkvDh_to_BSDkvs                                    r   rewriteGroupQueryAttention.rewrite]  s   < {{1BKK<M<M)N{O{{A3bkk>O>O)P{Q"))QCr{{?P?P*QR,,|\A,NGGO0A0AGB	iK!#!E1K5K'(113I"--J"))!,J%'%D%DZ&+5&" $&;;*;#E 

#9;OPI/I3I%&//1I"--J"))!,J%'%D%DZ&+5&" (*{{j{'I$

#9;STI%%"nn**#00#! & 
 	
r   )r   r   r   )NNNN)rl   z_basics.MatchContext)
rt   ru   rv   rw   r   r   rq   r   rx   __classcell__)r   s   @r   rz   rz      sH    4kAt  $###!W%WN  $####O
 O
r   rz   )r   r   r   strr   zir.Modelr   r   ) 
__future__r   typingr   r   rj   rh   onnx_irrQ   !onnxscript.rewriter._fusion_utilsrewriterr   onnxscript.rewriterr   r   r   r   SymbolicDimDimr   rL   PatternBaserN   r   RewriteRuleClassBaserz   rule_basic_gqa_ruleRewriteRuleSet	gqa_rulesapply_fusion_rulesfuse_gqars   r   r   <module>r     s    # "   9 9 ; ;& C >*Z9,, 9x *+ Y
'66 Y
x &**,""O#45	++I6r   