
    3KiP                     ~   % S SK r S SKJrJrJr  SSKJr  S SKJrJ	r	  S SK
Jr  S SK
Jr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKrSS/r\	" S5      r\" S5      r\ R6                  R8                  rS r0 r\\\4   \ S'   S r!S;S\\\\4   /\\\4   4   4S jjr"\"" \RF                  5      SS.S\$4S jj5       r%\"" \RL                  5      S<S\$4S jj5       r'\"" \RP                  5      S<S\$4S jj5       r)\"" \RT                  5      S<S\$4S jj5       r+\"" \RX                  5           S=S\$4S jj5       r- S;S\.\$   S\.\$   S\.\$   S\/S\$4
S  jjr0\"" \Rb                  \Rd                  \Rf                  \Rh                  \Rj                  /5      SS.S\$4S! jj5       r6\"" \Rn                  5      S\$4S" j5       r8S# r9\"" \Rt                  \Rv                  \Rx                  /5      SS.S\$4S$ jj5       r=S% r>SS&.S\\?\?\$S'4   \?\$S'4   \?\$S'4   \?\$S'4   S-  4      4S( jjr@SS&.S\\?\?\$S'4   \?\$S'4   \?\$S'4   \?\$S'4   S-  4      4S) jjrA\"" \R                  S*S+9SS.S\$4S, jj5       rC\"" \R                  S*S+9S\$4S- j5       rES. rF\"" \R                  \R                  \R                  /5      SS.S\$4S/ jj5       rJ\"" \R                  S*S+9S\$4S0 j5       rL\"" \R                  S*S+9S\$4S1 j5       rN0 \RF                  \%_\RL                  \'_\RP                  \)_\RT                  \+_\RX                  \-_\Rb                  \6_\Rd                  \6_\Rf                  \6_\Rj                  \6_\Rh                  \6_\Rn                  \8_\Rt                  \=_\Rv                  \=_\Rx                  \=_\R                  \J_\R                  \J_\R                  \J_\R                  \C\R                  \E\R                  \L\R                  \N0ErS2 rO/ S3QrPS4 rQS5 rRS\S4S6 jrTS7 rU " S8 S5      rV " S9 S:\5      rWg)>    N)tree_maptree_flattentree_unflatten   )ModuleTracker)AnyTypeVar)Callable)Iterator)	ParamSpec)defaultdict)TorchDispatchModeprodwrapsFlopCounterModeregister_flop_formula_T_Pc                 \    [        U [        R                  5      (       a  U R                  $ U $ N)
isinstancetorchTensorshape)is    Z/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/torch/utils/flop_counter.py	get_shaper      s!    !U\\""wwH    flop_registryc                 8   ^  [        T 5      S S.U 4S jj5       nU$ )N)out_valc                 B   > [        [        XU 45      u  pnT" USU0UD6$ )N	out_shape)r   r   )r#   argskwargsr%   fs       r   nfshape_wrapper.<locals>.nf   s.    "*9tW6M"Ni$6)6v66r    r   r(   r)   s   ` r   shape_wrapperr,      s#    
1X 7 7 Ir    returnc                 h   ^ ^ S[         [        [        4   S[         [        [        4   4UU 4S jjnU$ )Nflop_formular-   c                    >^  T(       d  [        T 5      m SU 4S jjn[        R                  R                  R	                  UT5        T $ )Nc                    > [        U [        R                  R                  5      (       d  [	        SU  S[        U 5       35      eU [        ;   a  [        SU  35      eT[        U '   g )Nzlregister_flop_formula(targets): expected each target to be OpOverloadPacket (i.e. torch.ops.mylib.foo), got z which is of type zduplicate registrations for )r   r   _opsOpOverloadPacket
ValueErrortyper!   RuntimeError)targetr/   s    r   register=register_flop_formula.<locals>.register_fun.<locals>.register)   sl    fejj&A&ABB Hh0f@A A &"%A&#JKK$0M&!r    )r-   N)r,   r   utils_pytree	tree_map_)r/   r8   get_rawtargetss   ` r   register_fun+register_flop_formula.<locals>.register_fun%   s7    (6L	1 	%%h8r    )r
   r   r   )r>   r=   r?   s   `` r   r   r   $   s5    8BF#3 R8H  & r    )r%   c                R    U u  pVUu  pxXg:w  a  [        SU SU 35      eXX-  S-  U-  $ )zCount flops for matmul.z3matmul: inner dimensions must match (k == k2), got  and    AssertionError)	a_shapeb_shaper%   r&   r'   mkk2ns	            r   mm_floprL   :   sE    
 DAEBwRSTRUUZ[]Z^_``519q=r    c                     [        X5      $ )zCount flops for addmm.rL   
self_shaperF   rG   r%   r'   s        r   
addmm_floprQ   F   s     7$$r    c                     U u  pEnUu  pxn	XG:w  a  [        SU SU 35      eXh:w  a  [        SU SU 35      eXE-  U	-  S-  U-  n
U
$ )z"Count flops for the bmm operation.z0bmm: batch dimensions must match (b == b2), got rB   z0bmm: inner dimensions must match (k == k2), got rC   rD   )rF   rG   r%   r'   brH   rI   b2rJ   rK   flops              r   bmm_floprV   K   ss    
 GA!IBAwOPQsRWXZW[\]]wOPQsRWXZW[\]]519q=1DKr    c                     [        X5      $ )z&Count flops for the baddbmm operation.)rV   rO   s        r   baddbmm_floprX   Z   s    
 G%%r    c	                     [        X5      $ )zCount flops for _scaled_mm.rN   )
rF   rG   scale_a_shapescale_b_shape
bias_shapescale_result_shape	out_dtypeuse_fast_accumr%   r'   s
             r   _scaled_mm_flopr`   a   s     7$$r    x_shapew_shaper%   
transposedc                 |    U S   nU(       a  U OUSS nUtpgn [        U5      [        U5      -  U-  U-  U-  S-  n	U	$ )a  Count flops for convolution.

Note only multiplication is
counted. Computation for bias are ignored.
Flops for a transposed convolution are calculated as
flops = (x_shape[2:] * prod(w_shape) * batch_size).
Args:
    x_shape (list(int)): The input shape before convolution.
    w_shape (list(int)): The filter shape.
    out_shape (list(int)): The output shape after convolution.
    transposed (bool): is the convolution transposed
Returns:
    int: the number of flops
r   rC   Nr   )
ra   rb   r%   rc   
batch_size
conv_shapec_outc_infilter_sizerU   s
             r   conv_flop_countrj   r   s[    ( J''Y;J 'E+ 
d;//*<uDtKaODKr    c                    [        XXvS9$ )zCount flops for convolution.rc   )rj   )
ra   rb   _bias_stride_padding	_dilationrc   r%   r&   r'   s
             r   	conv_floprq      s     7YNNr    c                 0   S nSn U
S   (       a"  [        US   5      nU[        XX(       + 5      -  nU
S   (       aY  [        US   5      nU(       a#  U[        U" U 5      U" U5      U" U5      SS9-  nU$ U[        U" U5      U" U 5      U" U5      SS9-  nU$ )Nc                 4    U S   U S   /[        U SS  5      -   $ )Nr   r   rC   )list)r   s    r   tconv_backward_flop.<locals>.t   s$    a%(#d59o55r    r   r   Frl   )r   rj   )grad_out_shapera   rb   rm   rn   ro   rp   rc   _output_padding_groupsoutput_maskr%   ru   
flop_countgrad_input_shapegrad_weight_shapes                   r   conv_backward_flopr~      s    6JDL 1~$Yq\2on?OQ_``
1~%il3/!N*;QwZK\I]joppJ
  /!G*a6GK\I]joppJr    c                     U u  p4pVUu  pxpUu  ppX7s=:X  a  U:X  a!  O  OXHs=:X  a  U:X  a  O  OXj:X  a
  X:X  a  Xj:X  d  [        S5      eSnU[        X4-  XV4X4-  Xi45      -  nU[        X4-  XY4X4-  X45      -  nU$ )zR
Count flops for self-attention.

NB: We can assume that value_shape == key_shape
z8sdpa_flop_count: query/key/value shapes are incompatibler   rE   rV   )query_shape	key_shapevalue_shaperS   hs_qd_q_b2_h2s_k_d2_b3_h3_s3d_vtotal_flopss                   r   sdpa_flop_countr     s     !NA#"Cc$Cc?s?!/c/3:]`]gWXXK8QUC-s/@AAK8QUC-s/@AAKr    c                    [        XU5      $ )Count flops for self-attention.r   )r   r   r   r%   r&   r'   s         r   	sdpa_flopr     s     ;;??r    c                     SSK Jn  SSKJn  [	        XU45      (       d8  U R
                  R                  S:w  a  U R                  5       R                  5       $ U/U R                  S5      S-
  -  $ )z
If the offsets tensor is fake, then we don't know the actual lengths.
In that case, we can just assume the worst case; each batch has max length.
r   )
FakeTensor)FunctionalTensormetar   )
torch._subclasses.fake_tensorr   #torch._subclasses.functional_tensorr   r   devicer5   difftolistsize)offsetsmax_lenr   r   s       r   _offsets_to_lengthsr   '  s\    
 9Dg,<=>>7>>CVCVZ`C`||~$$&&9Q!+,,r    )grad_out.c              #     #    UGb+  [        UR                  5      S:w  a  [        S5      e[        UR                  5      S:w  a  [        S5      eUb%  UR                  U R                  :w  a  [        S5      eU R                  u  pn
UR                  u  pnUR                  u  pnUc  [        S5      eUc  [        S5      eUR                  UR                  :w  a  [        S5      e[        XF5      n[        XW5      n[	        UUS	S
9 H'  u  nnSU	UU
4nSUUU4nSUUU4nUb  UOSnUUUU4v   M)     gU R                  UR                  UR                  Ub  UR                  OS4v   g7f)a'  
Given inputs to a flash_attention_(forward|backward) kernel, this will handle behavior for
NestedTensor inputs by effectively unbinding the NestedTensor and yielding the shapes for
each batch element.

In the case that this isn't a NestedTensor kernel, then it just yields the original shapes.
N   z7sdpa_flop_count: expected key.shape to be 3-dimensionalz9sdpa_flop_count: expected value.shape to be 3-dimensionalzDsdpa_flop_count: grad_out.shape must match query.shape when providedz+sdpa_flop_count: cum_seq_q must not be Nonez+sdpa_flop_count: cum_seq_k must not be NonezAsdpa_flop_count: cum_seq_q and cum_seq_k must have the same shapeTstrictr   lenr   rE   r   zip)querykeyvaluer   	cum_seq_q	cum_seq_kmax_qmax_k_h_qr   h_kd_kh_vr   seq_q_lengthsseq_k_lengths	seq_q_len	seq_k_lennew_query_shapenew_key_shapenew_value_shapenew_grad_out_shapes                          r   %_unpack_flash_attention_nested_shapesr   3  sp    $  syy>Q !Z[[u{{q  !\]]HNNekk$A !ghhkkiikk !NOO !NOO??ioo- !dee+I=+I=&)-t&T"Y	 #y#6OY4M #y#6O4<4Hd!=/CUUU 'U 	
++syy%++AUx~~[_
__s   E&E(c              #     #    UGb.  [        UR                  5      S:w  a  [        S5      e[        UR                  5      S:w  a  [        S5      eUb%  UR                  U R                  :w  a  [        S5      eU R                  u    pn
UR                  u    pnUR                  u    pnUc  [        S5      eUc  [        S5      eUR                  UR                  :w  a  [        S5      e[        XF5      n[        XW5      n[	        UUS	S
9 H'  u  nnSU	UU
4nSUUU4nSUUU4nUb  UOSnUUUU4v   M)     gU R                  UR                  UR                  Ub  UR                  OS4v   g7f)a+  
Given inputs to a efficient_attention_(forward|backward) kernel, this will handle behavior for
NestedTensor inputs by effectively unbinding the NestedTensor and yielding the shapes for
each batch element.

In the case that this isn't a NestedTensor kernel, then it just yields the original shapes.
N   zQ_unpack_efficient_attention_nested_shapes: expected key.shape to be 4-dimensionalzS_unpack_efficient_attention_nested_shapes: expected value.shape to be 4-dimensionalz^_unpack_efficient_attention_nested_shapes: grad_out.shape must match query.shape when providedzH_unpack_efficient_attention_nested_shapes: cu_seqlens_q must not be NonezH_unpack_efficient_attention_nested_shapes: cu_seqlens_k must not be Noneza_unpack_efficient_attention_nested_shapes: cu_seqlens_q and cu_seqlens_k must have the same shapeTr   r   r   )r   r   r   r   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_kr   r   r   r   r   r   r   	seqlens_q	seqlens_klen_qlen_kr   r   r   r   s                          r   )_unpack_efficient_attention_nested_shapesr   g  s    $  syy>Q !tuuu{{q  !vwwHNNekk$A   "B  C  C131313 !kll !kll!3!33  "Z [ ['C	'C		9TBLE5 #uc2OUC0M #uc2O4<4Hd!=/CUUU C 	
++syy%++AUx~~[_
__s   E)E+T)r=   c          
      D    [        U UUUUUUS9n
[        S U
 5       5      $ )r   )r   r   r   r   r   r   r   c              3   @   #    U  H  u  pp4[        XU5      v   M     g 7fr   r   .0r   r   r   r   s        r   	<genexpr>0_flash_attention_forward_flop.<locals>.<genexpr>  &      6;2KK 	<<6;   r   sum)r   r   r   r   r   r   r   r%   r&   r'   sizess              r   _flash_attention_forward_flopr     s?    " 2E  6;  r    c           
      D    [        U UUUUUUS9n
[        S U
 5       5      $ )r   )r   r   r   r   r   r   r   c              3   @   #    U  H  u  pp4[        XU5      v   M     g 7fr   r   r   s        r   r   4_efficient_attention_forward_flop.<locals>.<genexpr>  r   r   r   r   )r   r   r   biasr   r   r   r   r&   r'   r   s              r   !_efficient_attention_forward_flopr     s?    " 6!!!!E  6;  r    c                    SnUu  pVpxUu  ppUu  pnnU u  nnnnXYs=:X  a  Us=:X  a  U:X  a  O  OXjs=:X  a  Us=:X  a  U:X  a  O  OX:X  d  [        S5      eUU:X  a  X:X  a  UU:X  d  [        S5      eSnU[        XV-  Xx4XV-  X45      -  nU[        XV-  UU4XV-  UU45      -  nU[        XV-  X4XV-  UU45      -  nU[        XV-  X{4XV-  X45      -  nU[        XV-  X4XV-  X{45      -  nU$ )Nr   zFsdpa_backward_flop_count: batch/heads/dimension mismatch among tensorszJsdpa_backward_flop_count: grad_out/value/key/query shapes are incompatibler   )rw   r   r   r   r   rS   r   r   r   r   r   r   r   r   r   r   r   _b4_h4_s4_d4s                        r   sdpa_backward_flop_countr     s2   K NA#"Cc$Cc3'Cc3!s!c!)?S)?C)?szeff#:SZsczijjK 8QUC-s/@AAK 8QUC-sC/@AAK8QUC-sC/@AAK 8QUC-s/@AAK8QUC-s/@AAKr    c                    [        XX#5      $ )z(Count flops for self-attention backward.r   )rw   r   r   r   r%   r&   r'   s          r   sdpa_backward_flopr     s    
 $NXXr    c
                 F    [        UUUU UUUU	S9n[        S U 5       5      $ )N)r   r   r   r   r   r   r   r   c              3   @   #    U  H  u  pp4[        XAX#5      v   M     g 7fr   r   r   r   r   r   rw   s        r   r   1_flash_attention_backward_flop.<locals>.<genexpr>  &      CI?KK 	!iUUCIr   r   )r   r   r   r   out	logsumexpr   r   r   r   r&   r'   shapess                r   _flash_attention_backward_flopr     sB    " 3	F  CI  r    c
                 F    [        UUUU UUUU	S9n[        S U 5       5      $ )N)r   r   r   r   r   r   r   r   c              3   @   #    U  H  u  pp4[        XAX#5      v   M     g 7fr   r   r   s        r   r   5_efficient_attention_backward_flop.<locals>.<genexpr>>  r   r   r   )r   r   r   r   r   r   r   r   r   r   r&   r'   r   s                r   "_efficient_attention_backward_flopr   #  sB    " 7!!!!	F  CI  r    c                 6    [        U [        5      (       d  U 4$ U $ r   )r   tuple)xs    r   normalize_tupler   \  s    atHr    ) KMBTc                     [        S[        [        [        5      S-
  [        [	        U 5      5      S-
  S-  5      5      n[        U   $ )Nr   r   rC   r   )maxminr   suffixesstr)numberindexs     r   get_suffix_strr  e  s=     3s8}q(3s6{+;a+?A*EFGEE?r    c                 X    [         R                  U5      nU SU-  -  S nU[         U   -   $ )Ni  z.3f)r   r  )r   suffixr  r   s       r   convert_num_with_suffixr  l  s2    NN6"E%c*E8E?""r    c                     US:X  a  gX-  S $ )Nr   0%z.2% )numdenoms     r   convert_to_percent_strr  s  s    zk#r    c                 0   ^  [        T 5      U 4S j5       nU$ )Nc                 >   > [        U 5      u  pT" U6 n[        X25      $ r   )r   r   )r&   	flat_argsspecr   r(   s       r   r)   )_pytreeify_preserve_structure.<locals>.nfy  s#    &t,	mc((r    r   r+   s   ` r   _pytreeify_preserve_structurer  x  s     
1X) )
 Ir    c                     ^  \ rS rSrSr    SS\R                  R                  \\R                  R                     -  S-  S\	S\
S\\\4   S-  SS4
U 4S	 jjjrS\	4S
 jrS\\\\\	4   4   4S jrSS jrS rS rS rSrU =r$ )r   i  a  
``FlopCounterMode`` is a context manager that counts the number of flops within its context.

It does this using a ``TorchDispatchMode``.

It also supports hierarchical output by passing a module (or list of
modules) to FlopCounterMode on construction. If you do not need hierarchical
output, you do not need to use it with a module.

Example usage

.. code-block:: python

    mod = ...
    with FlopCounterMode(mod) as flop_counter:
        mod.sum().backward()

Nmodsdepthdisplaycustom_mappingr-   c                 n  > [         TU ]  5         [        S 5      U l        X l        X0l        S U l        Uc  0 nUb  [        R                  " SSS9  0 [        EUR                  5        VVs0 s H%  u  pVU[        USS5      (       a  UO
[        U5      _M'     snnEU l	        [        5       U l        g s  snnf )Nc                       [        [        5      $ r   )r   intr  r    r   <lambda>*FlopCounterMode.__init__.<locals>.<lambda>  s
    +VYJZr    z<mods argument is not needed anymore, you can stop passing itrC   )
stacklevel_get_rawF)super__init__r   flop_countsr  r  modewarningswarnr!   itemsgetattrr,   r   mod_tracker)selfr  r  r  r  rI   v	__class__s          r   r  FlopCounterMode.__init__  s     	6ABZ6[
-1	!NMMXefg

WeWkWkWmnWmtqqwq*e44!-:JJWmn
 )? os   +,B1c                 N    [        U R                  S   R                  5       5      $ )NGlobal)r   r   valuesr'  s    r   get_total_flopsFlopCounterMode.get_total_flops  s!    4##H-44677r    c                     U R                   R                  5        VVs0 s H  u  pU[        U5      _M     snn$ s  snnf )zReturn the flop counts as a dictionary of dictionaries.

The outer
dictionary is keyed by module name, and the inner dictionary is keyed by
operation name.

Returns:
    Dict[str, Dict[Any, int]]: The flop counts as a dictionary.
)r   r$  dict)r'  rI   r(  s      r   get_flop_countsFlopCounterMode.get_flop_counts  s7     (,'7'7'='='?@'?tq47
'?@@@s   :c                 (  ^ ^
^^ Uc  T R                   nUc  SnSS KnSUl        / SQn/ nT R                  5       m
[	        T
5      mSmU
UUU 4S jn[        T R                  R                  5       5       HB  nUS:X  a  M  UR                  S5      S	-   nXq:  a  M&  U" XgS	-
  5      nUR                  U5        MD     ST R                  ;   a'  T(       d   U H  n	S
U	S   -   U	S'   M     U" SS5      U-   n[        U5      S:X  a  / SQ/nUR                  XCSS9$ )Ni?B r   T)ModuleFLOPz% TotalFc           	        > [        T
R                  U    R                  5       5      nT	UT:  -  m	SU-  n/ nUR                  X0-   [	        UT5      [        UT5      /5        T
R                  U    R                  5        H<  u  pVUR                  US-   [        U5      -   [	        UT5      [        UT5      /5        M>     U$ )N z - )r   r   r-  appendr  r  r$  r   )mod_namer  r   paddingr-  rI   r(  global_flopsglobal_suffixis_global_subsumedr'  s          r   process_mod.FlopCounterMode.get_table.<locals>.process_mod  s     d..x8??ABK+"==EkGFMM"']C&{LA 
 ((288:eOc!f,+A}=*1l;  ; Mr    r,  .r   r9  )r,  0r  )leftrightrE  )headerscolalign)r  tabulatePRESERVE_WHITESPACEr/  r  sortedr   keyscountextendr   )r'  r  rH  headerr-  r@  mod	mod_depth
cur_valuesr   r=  r>  r?  s   `         @@@r   	get_tableFlopCounterMode.get_table  s%   =JJE=E 	'+$.++-&|4"	 	, $**//12Ch		#*I $Sa-8JMM*% 3 t'''0Bq>a   !1-6Fv;!+,F  B\ ]]r    c                     U R                   R                  5         U R                  R                  5         [	        U 5      U l        U R
                  R                  5         U $ r   )r   clearr&  	__enter___FlopCounterModer!  r.  s    r   rV  FlopCounterMode.__enter__  sG     ""$$T*			r    c                    U R                   c  [        S5      eU R                   R                  " U6 nS U l         U R                  R                  5         U R                  (       a$  [        U R                  U R                  5      5        U$ )Nz<Internal error: FlopCounter.__exit__ called but mode is None)r!  rE   __exit__r&  r  printrR  r  )r'  r&   rS   s      r   rZ  FlopCounterMode.__exit__   sf    99 !_``II%	!!#<<$..,-r    c                     XR                   ;   a[  U R                   U   nU" U0 UDSU0D6n[        U R                  R                  5       H  nU R                  U   U==   U-  ss'   M     U$ )Nr#   )r!   setr&  parentsr   )r'  func_packetr   r&   r'   flop_count_funcr{   pars           r   _count_flopsFlopCounterMode._count_flops
  so    ,,,"00=O($F&F#FJ4++334  %k2j@2 5 
r    )r  r  r   r!   r&  r!  )NrC   TNr   )__name__
__module____qualname____firstlineno____doc__r   nnr6  rt   r  boolr2  r   r  r/  r   r3  rR  rV  rZ  rc  __static_attributes____classcell__)r)  s   @r   r   r     s    * DH 48+((//D$99D@+ + 	+
 !cNT1+
 >B+ +*8 8
Ac4S>&9!: 
A<^~ r    c                   @    \ rS rSrSrS\SS4S jrS rS rSS	 jr	S
r
g)rW  i  Tcounterr-   Nc                     Xl         g r   ro  )r'  ro  s     r   r  _FlopCounterMode.__init__  s    r    c                    SSK nUR                  U R                  R                  5      nU    U" U6 nSSS5        UR                  U R                  R                  5      nX@R                  l        WU4$ ! , (       d  f       NG= f)a]  Execute a branch function and capture its FLOP counts without
affecting self.counter.flop_counts

Args:
    branch_fn: The branch function to execute
    operands: Arguments to pass to the branch function

Returns:
    Tuple of (result, flop_counts) where result is the branch output
    and flop_counts is a copy of the FLOP counts after execution
r   N)copyro  r   )r'  	branch_fnoperandsrt  checkpointed_flop_countsresultr   s          r   $_execute_with_isolated_flop_counting5_FlopCounterMode._execute_with_isolated_flop_counting  sg     	#'99T\\-E-E#F )F ii 8 89#; {""	 Ts   A33
Bc                 >   U[         R                  R                  R                  La  [        $ U[         R                  R                  R                  L GaH  Uu  pVpxU R                  Xh5      u  pU	[        L a  [        $ U R                  Xx5      u  pU[        L a  [        $ [        U
R                  5       5      [        UR                  5       5      -  n0 nU H  nX   nX   n0 n[        UR                  5       5      [        UR                  5       5      -  nU H6  nUR                  US5      nUR                  US5      n[        UU5      UU'   M8     UX'   M     UR                  5        H.  u  nnU R                  R                  U   R                  U5        M0     U	$ g )Nr   )r   opshigher_ordercondNotImplementedry  r^  rK  getr   r$  ro  r   update)r'  functypesr&   r'   predtrue_branchfalse_branchrv  true_outtrue_flop_counts	false_outfalse_flop_countsall_mod_keysmerged_flop_counts	outer_keytrue_func_countsfalse_func_countsmerged_func_countsall_func_keysfunc_keytrue_val	false_val
inner_dicts                           r   _handle_higher_order_ops)_FlopCounterMode._handle_higher_order_ops-  s   uyy--222!! 599))...8<5D|)-)R)R*&H >)%%+/+T+T,(I N*%% /4467#>O>T>T>V:WWL!#)	#3#> $5$@!%'" #$4$9$9$; <sCTCYCYC[?\ \ -H/33Ha@H 1 5 5h BI36x3K&x0 !.
 1C"- * *<)A)A)C%	:((3:::F *D
 OO /r    c                 d   U(       a  UO0 nU[         R                  R                  R                  R                  [         R                  R                  R
                  R                  [         R                  R                  R
                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                  R                  [         R                  R                  R                   R                  [         R                  R                  R"                  R                  [         R                  R$                  R&                  R                  1;   a  [(        $ [+        U[         R,                  R.                  5      (       a  U R1                  XX45      $ XR2                  R4                  ;  ac  U[         R                  R$                  R6                  R                  La2  U    UR8                  " U0 UD6nU[(        La  UsS S S 5        $  S S S 5        U" U0 UD6nU R2                  R;                  UR<                  XcU5      $ ! , (       d  f       N== fr   )r   r|  atensym_is_contiguousdefaultis_contiguousmemory_formatis_strides_like_formatis_non_overlapping_and_denser   sym_sizestride
sym_stridestorage_offsetsym_storage_offsetnumel	sym_numeldimprimlayoutr  r   r2   HigherOrderOperatorr  ro  r!   r   	decomposerc  _overloadpacket)r'  r  r  r&   r'   rr   s          r   __torch_dispatch__#_FlopCounterMode.__torch_dispatch__^  s9   !r EIINN44<<IINN0088IINN00>>IINN99AAIINN??GGIINN''//IINN++33IINN))11IINN--55IINN1199IINN55==IINN((00IINN,,44IINN&&..IINN))113 3  "!dEJJ::;;00dKK ||111d%))..BWBWB_B_6_NND3F3N* *  D#F#||(()=)=s&QQ s   N!!
N/rq  )r  N)re  rf  rg  rh  supports_higher_order_operatorsr   r  ry  r  r  rl  r  r    r   rW  rW    s,    &*# D #(/b"Rr    rW  )Fr   )NNNFN)Xr   torch.utils._pytreer   r   r   module_trackerr   typingr   r	   collections.abcr
   r   typing_extensionsr   collectionsr   torch.utils._python_dispatchr   mathr   	functoolsr   r"  __all__r   r   r|  r  r   r!   r2  __annotations__r,   r   mmr  rL   addmmrQ   bmmrV   baddbmmrX   
_scaled_mmr`   rt   rk  rj   convolution_convolutioncudnn_convolution_slow_conv2d_forwardconvolution_overrideablerq   convolution_backwardr~   r   '_scaled_dot_product_efficient_attention#_scaled_dot_product_flash_attention#_scaled_dot_product_cudnn_attentionr   r   r   r   r   _flash_attention_forwardr   _efficient_attention_forwardr   r   0_scaled_dot_product_efficient_attention_backward,_scaled_dot_product_flash_attention_backward,_scaled_dot_product_cudnn_attention_backwardr   _flash_attention_backwardr   _efficient_attention_backwardr   r   r   r  r  r   r  r  r   rW  r  r    r   <module>r     s    F F )  $ $ ' # :   5
6T]t_yy~~
 !#tCH~ "XxB?O>PRZ[]_a[aRb>b5c , tww/3 	# 	  	 tzz"%# % #% txx C  ! t||$&C & %& t' % 	% (%( 	$#Y$#Y$ Cy$ 	$
 	$L (())..1155	7 8
 cg Oux O8
O t001e e 2eN& DD@@@@B C EI @WZ @C@	-" 1` eE#s(OU38_eCHouSRUXY]G]]^_1`r 4` eE#s(OU38_eCHouSRUXY]G]]^_4`n t44dC  	 D> t88$G 	 H>: MMIIIIK L ^b Yps YLY t55tD 	 E@ t994H 	 I@GGWJJ
 	HHh 	LL,	
 	OO_ 	i 	y 	I 	!!9 	y 	1 	00) 	,,i 	,,i 	99;M  	557I!" 	557I#$ 	!!#@%%'H""$B&&(J+0 $# #  
O ObmR( mRr    