
    iF&                     B   S SK JrJrJr  S SKrS SKJr  S SKJrJ	r	  S SK
Jr  \R                  R                  r\	" S5      S\R                  S\R                  S	\R                  S
\S\S\R                  4S j5       r\" S5      S\R                  S\R                  S	\R                  S
\S\S\R                  4S j5       r\	" S5      S\R                  S\R                  S	\R                  S\R                  S\R                  S\R                  S\R                  4S j5       r\" S5      S\R                  S\R                  S	\R                  S\R                  S\R                  S\R                  S\R                  4S j5       r\	" S5      S\R                  S\R                  S\R                  S	\R                  S
\S\S\R                  S\R                  4S j5       r\" S5      S\R                  S\R                  S\R                  S	\R                  S
\S\S\R                  S\R                  4S j5       r " S S\5      r\R.                  r\R0                  r\" \R2                  R4                  5      S 5       r\" \R8                  R4                  5      S 5       r\" \R<                  R                  5      S 5       r\" \R@                  RB                  5      S 5       r"\" \RF                  R4                  5      S  5       r$\" \RJ                  R4                  5      S! 5       r&\" \RN                  R4                  5      S" 5       r(\" \RR                  R4                  5      S# 5       r*\" \RV                  RX                  RZ                  5      S$ 5       r.g)%    )ListOptionalTupleN)return_and_correct_aliasing)register_custom_opregister_custom_op_impl)TorchAOBaseTensorzblocksparse::bsr_to_densecrow_indicescol_indicesvaluesMKreturnc                 L    [         R                  " XX#U4S9R                  5       $ )N)r
   r   r   size)torchsparse_bsr_tensorto_denser
   r   r   r   r   s        [/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/sparsity/blocksparse.pybsr_to_denser      s)     ""!6TUPVhj    c                 X    [         R                  " X44UR                  UR                  S9$ )Ndtypedevice)r   emptyr   r   r   s        r   bsr_to_dense_abstractr      s!     ;;vV\\&--HHr   zblocksparse::int_addmmA
left_alpharight_alphac                 v   SSK Jn  SSKJn  UR                  [
        R                  :X  d   eUR                  S   nUR                  S   n	UR                  S   n
[
        R                  " XX(U	4S9nU" [        X5      nUR                  XU
4-   [
        R                  S9nU" UUUSSUUUS	9R                  5       $ )
Nr   )broadcast_batch_dimsbsr_dense_addmmr   )r      )alphabetaoutr    r!   )torch.sparse._triton_opsr#   torchao.kernel.bsr_triton_opsr%   r   r   int8shaper   blocksparse_int_addmm	new_emptybfloat16t)r
   r   r   r   r    r!   r#   r%   r   r   N
weight_bsroriginal_batch_dims_broadcastedr,   s                 r   r1   r1   *   s     >=<<5::%%%A	A	A((FUVQWXJ&:z'# ++5A>enn+
UC		 ac	
r   c                     UR                   S   nUR                   S   n[        R                  " Xv4[        R                  UR                  S9R                  5       $ )Nr&   r   )r0   r   r   r3   r   r4   )r
   r   r   r   r    r!   r5   r   s           r   blocksparse_int_addmm_abstractr9   L   sG     	
AA;;vU^^AHHEGGIIr   zblocksparse::addmmx_paddedbiasc           	          SSK Jn  Ub   e[        R                  " XX4U4S9nU R                  S   n	U R                  XI45      n
U" U
UU SSU
S9  U
$ )Nr   r$   r(   r)   )r*   r+   r,   )r.   r%   r   r   r0   r2   )r:   r
   r   r   r   r   r;   r%   bsrN_paddedr,   s              r   blocksparse_addmmr?   [   sh     ><<

!
!,Va&
QC~~a H


a]
+C Jr   c                 D    U R                   S   nU R                  XG45      $ )Nr)   )r0   r2   )r:   r
   r   r   r   r   r;   r>   s           r   blocksparse_addmm_abstractrA   v   s%     ~~a Hqm,,r   c                      \ rS rSr% \\R                     \S'   \\R                     \S'   \\R                     \S'   \\S'   / SQr	\
 SS\R                  S\S\\R                     S\\R                     S\\R                     S\4S	 jj5       rS
\4S jrS
\\\   \\R                  \\4   4   4S jr\S\\R                  \\4   S
\R                  4S j5       r\S 5       rS rSrg)BlockSparseTensor   bsr_crow_indicesbsr_col_indices
bsr_values	blocksize)rE   rF   rG   r0   requires_gradc                     Uc  [        S5      eUnUR                  UR                  UR                  US.n[        R
                  R                  " X40 UD6n	X)l        X9l        XYl	        XIl
        U	$ )NzCNo values passed to BlockSparseTensor: bsr_values must be provided!)r   r   layoutrI   )
ValueErrorr   r   rK   r   Tensor_make_wrapper_subclassrH   rE   rG   rF   )
clsr0   rH   rE   rF   rG   rI   previous_tensorkwargstensors
             r   __new__BlockSparseTensor.__new__   s     U  )O &,,$**%,,*	
 44SJ6J$"2&!0r   r   c                 t    [        U S5      (       d   eU R                  R                   SU R                   S3$ )Nr0   z(shape=))hasattr	__class____name__r0   )selfs    r   __repr__BlockSparseTensor.__repr__   s7    tW%%%%..))*'$**Q??r   c                    ^  [        [        U 4S jT R                  5      5      nT R                  T R                  T R
                  4nX4$ )Nc                     > [        TU 5      S L$ )N)getattr)xrZ   s    r   <lambda>6BlockSparseTensor.__tensor_flatten__.<locals>.<lambda>   s    WT1-T9r   )listfilter	__slots__r0   rI   rH   )rZ   inner_tensorstensor_metas   `  r   __tensor_flatten__$BlockSparseTensor.__tensor_flatten__   sA    94>>J
 zz4#5#5t~~F))r   rg   c           
          Uu  pVnU " UUUR                  SS 5      UR                  SS 5      UR                  SS 5      US9$ )NrE   rF   rG   r0   rH   rE   rF   rG   rI   )get)rO   rf   rg   
outer_sizeouter_strider0   rI   rH   s           r   __tensor_unflatten__&BlockSparseTensor.__tensor_unflatten__   sV     +6'i*../A4H)--.?F$((t<'
 	
r   c           	          UR                  U5      nU " UR                  UUR                  5       UR                  5       UR	                  5       SS9$ )NFrk   )to_sparse_bsrr0   r
   r   r   )rO   dense_tensorrH   
bsr_tensors       r   
from_denseBlockSparseTensor.from_dense   sR    !//	:
$$'446&224!((*
 	
r   c           	          [        U R                  U R                  U" U R                  5      U" U R                  5      U" U R
                  5      U R                  S9$ )Nrk   )rC   r0   rH   rE   rF   rG   rI   )rZ   funcs     r   apply_fn_to_shard#BlockSparseTensor.apply_fn_to_shard   sN     **nn!$"7"78 !5!56DOO,,,
 	
r    N)F)rY   
__module____qualname____firstlineno__r   r   rM   __annotations__intre   staticmethodSizeboolrS   strr[   r   r   rh   classmethodro   ru   ry   __static_attributes__r{   r   r   rC   rC      s6   u||,,ell++&&NEI $zz  #5<<0	
 "%,,/ U\\*  :@# @*E$s)U5::tS;P5Q*Q$R * 
 5::tS01
 

 
$ 

 


r   rC   c                 Z    [        XX2S   R                  [        R                  5      5      $ Nr   )r   ry   r   detachrx   typesargsrQ   s       r   block_sparse_detachr      s'    &FG55ellC r   c           	      t   [        U5      S:X  d   e[        U5      S:X  d   eUS   S:X  d   eUS   nUR                  5       S:X  d   eUR                  (       a   e[        UR                  S-   UR
                  UR                  5       UR                  5       UR                  5       R                  S5      SS9$ )N   r   r&   )r)   F)rI   )
lendimrI   rC   r0   rH   r
   r   r   	unsqueeze)rx   r   r   rQ   r=   s        r   block_sparse_unsqueezer      s    t9>>v;!8q==
q'C779>>    		D

r" r   c                     [        U5      S:X  d   e[        U5      S:X  d   eUu  pES n[        U[        R                  5      (       a  [        U[        5      (       a  U" XT5      $ U" XE5      $ )Nr   r   c                    [        U [        5      (       d   e[        U[        R                  5      (       d   eU R	                  5       S:X  d   eUR	                  5       S:X  d   eU R
                  (       a   eUR                  S5      S:X  d   eUR                  UR                  S5      UR                  S5      U R                  -  U R                  S5      nUR                  SS5      R                  SU R                  5       5      nU R                  5       U-  n[        U R                  U R                  U R                  5       U R                  5       U5      $ )N   r   r)   )
isinstancerC   r   rM   r   rI   r   viewrH   	transposeindex_selectr   r   r0   r
   )r=   r4   	t_blockedmasked_t
new_valuess        r   my_mul block_sparse_mul.<locals>.my_mul  s   #01111!U\\****wwyA~~uuw!||$$$$vvayA~~FF166!9affQi3==&@#--QRS	&&q!,99!S__=NOZZ\H,
 IIs}}c&6&6&8#//:KZ
 	
r   )r   r   r   rM   rC   )rx   r   r   rQ   r=   r4   r   s          r   block_sparse_mulr      sd    t9>>v;!FC
 #u||$$A7H)I)Ia~#>r   c                    Uu  pE[        U5      [        :X  d   e[        U5      S:X  d   eUS   nUS:X  d   e[        R                  R
                  R                  UR                  5       UR                  5       UR                  S   5      $ )Nr)   r   )
typerc   r   r   opsblocksparsesumr   r
   r0   )rx   r   r   rQ   r=   r   s         r   block_sparse_sumr     sv    HC9s8q==
a&C!8O899  $$SZZ\33C3C3EsyyQR|TTr   c                 <    US   R                   R                  5       $ r   )rG   r   r   s       r   block_sparse_valuesr   "  s    7$$&&r   c                 <    US   R                   R                  5       $ r   )rE   r   r   s       r   block_sparse_crow_indicesr   '  s    7##**,,r   c                 <    US   R                   R                  5       $ r   )rF   r   r   s       r   block_sparse_col_indicesr   ,  s    7""))++r   c                 :    US   R                   R                  S   $ r   )rG   r0   r   s       r   block_sparse__nnzr   1  s    7##A&&r   c           	         Uu  pEnUR                  SUR                  S5      5      R                  5       nUR                  S   nUR                  S   n	[        R
                  R                  R                  UUR                  5       UR                  5       UR                  5       UU	S 5      n
U
R                  5       nUc  U$ X-   $ )Nr&   r   r)   )reshaper   r4   r0   r   r   r   addmmr
   r   r   )rx   r   r   rQ   x_origwr;   r`   r   r   r,   out_origs               r   block_sparse_linearr   6  s    OFtr6;;r?+--/A	
A	
A
))


%
%				
		C uuwH|?r   )/typingr   r   r   r   torch.utils._python_dispatchr   torchao.opsr   r   torchao.utilsr	   r   atenrM   r   r   r   r1   r9   r?   rA   rC   
implementsimplements_torch_functionr   defaultr   r   r   mulr   r   dim_IntListr   r   r   r
   r   r   r   _nnzr   nn
functionallinearr   r{   r   r   <module>r      sV   ) (  D C +yy~~ 45	,,		 LL	 		
 	 \\	 6	 /0I,,II LLI 	I
 I \\I 1I 12
,,

 LL
 ||	

 
 
 \\
 3
B ,-J,,JJ LLJ ||	J
 J J \\J .J -.ll,,  LL	
   ,, \\ /4 ()
-ll
-,,
- 
- LL	
-
 
- 
- ,,
- \\
- *
-Y
) Y
z ))
-GG  DKK  ! DNN""# $" DHHOO 0 DHH  !U "U DKK ' !' D%%&- '- D$$%, &, DII' ' 588..556 7r   