
    i?(                        S r SSKrSSKJrJrJrJrJr  SSKrSSK	J
r
Jr  SSKJr  SSKJrJr  \R"                  r " S S\5      rS	\S
\S\S\\\4   4S jrS\R"                  S\\S4   4S jr   S+S	\S\S
\S\S\R,                  S\\   S\\   S\S\4S jjrS\R"                  S\\   S\S\S\S\S\R"                  4S jrS \R"                  S\4S! jrS \R"                  S\4S" jrS \R"                  S\4S# jrS \R"                  S\4S$ jrS%\\\\\4   \\   4   S\4S& jrS'\\\\\\4   \\   4      S\\\4   4S( jr S)\\\4   SS4S* jr!g),z;
Defines an nn module designed to be used during inference
    N)List
NamedTupleOptionalTupleUnion)is_row_majorpad_tensor_for_matmul)FP8Granularity)is_MI300is_sm_at_least_89c                   D    \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\\S'   Sr
g)	Float8MMConfig   a{  
Configuration for the scaled_mm in the forward and backward pass.

Attributes:
    emulate (bool): Whether to emulate the matmuls in fp32.
    use_fast_accum (bool): Whether to use the fast-accumulation option for scaled_mm.
    pad_inner_dim (bool): Whether to pad the inner dimension of a and b with 0s.
                          This is needed for matmuls not aligned to 16.
Femulateuse_fast_accumpad_inner_dim N)__name__
__module____qualname____firstlineno____doc__r   bool__annotations__r   r   __static_attributes__r       W/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/float8/inference.pyr   r      s'     GT ND M4r   r   a_datab_datascaled_mm_configreturnc                    UR                   (       ac  U R                  S5      UR                  S5      :X  d+   SU R                  S5       SUR                  S5       35       e[        U SS9n [        USS9n[        U R	                  5       5      (       d  U R                  5       n [        UR	                  5       5      (       a,  UR                  5       R                  5       R                  5       nX4$ )zPreprocess the inner fp8 data tensors for admmm
Args:
    a_data: Input tensor A.
    b_data: Input tensor B.
    scaled_mm_config: Configuration for _scaled_mm.
Returns:
    Preprocessed tensors A and B in the format for _scaled_mm.
   r   z"Inner dims must match for mm, got z and )dims)r   sizer	   r   stride
contiguoust)r   r   r    s      r   preprocess_datar)   )   s     %%{{1~Q/ 	
0Q0@fkkRSnEUV	
/ 'vA6&vA6((""$FMMO$$&&(**,>r   input_scaleinput_shape.c                     U R                  5       S:X  a  U R                  SS5      $ U R                  S5      n U R                  5       S:  a  U R                  SU R                  S   5      n U $ )z:Ensures input tensor is correctly formatted for _scaled_mmr#      )numelreshape	unsqueezedimshape)r*   r+   s     r   preprocess_scaler4   C   sn     a""1a(( ''+K 1!))"k.?.?.CDr   a_scaleb_scaleoutput_dtypeoutput_scalebiasr   c                     U[         R                  :X  a!  Ub  [         R                  " U UUUUUUS9nX-   $ [         R                  " U UUUUUUUS9$ )z
This is the unwrapped version of addmm_float8, which does not take in Float8TrainingTensors
as inputs. This is used to standardize the logic between subclassed and non subclassed
versions of the linear module.
)scale_ascale_bscale_result	out_dtyper   )r;   r<   r9   r=   r>   r   )torchfloat32
_scaled_mm)	r   r5   r   r6   r7   r8   r9   r   outputs	            r    addmm_float8_unwrapped_inferencerC   U   so      u}}$)9!!%")
 }!%	 	r   scale
data_shaper2   startendstepc                   ^ ^ [         R                  R                  nT R                  T:X  a  UR                  R                  T X#XE5      $ [        UU 4S j[        [        T5      5       5       5      nU[        U5      :  a  T $ Xr   nUS:X  a  UR                  R                  T X#XE5      $ Ub  X8-  OSn	Ub
  XH-   S-
  U-  OSn
US:  a  [        S5      eUR                  R                  T X)U
S5      $ )z
Slice the scale tensor appropriately based on the data tensor slicing.
This function calculates how the scale should be sliced when the data tensor
is sliced along a given dimension, taking into account the block structure.
c              3   N   >#    U  H  nTU   TR                   U   -  v   M     g 7f)N)r3   ).0irE   rD   s     r   	<genexpr>-_slice_scale_for_dimension.<locals>.<genexpr>   s$     XAWA
1Q7AWs   "%r#   Nz;Slicing with step > 1 is not implemented for scale tensors.)
r?   opsatenr3   sliceTensortuplerangelenNotImplementedError)rD   rE   r2   rF   rG   rH   rP   block_sizesblock_size_for_dimscale_start	scale_ends   ``         r   _slice_scale_for_dimensionr[   }   s     99>>D {{j zz  C>> Xs:AWXXK
c+$)Q zz  C>> 6;5Fe1D  %).@@ 	 !8%M  zz  )QGGr   xc                     [        U S5      (       d   S5       e[        U R                  5      SU R                  5       S-
  -  U R                  S   4-   :H  $ )rChecks if a quantized tensor is rowwise scaled
Args:
    x: quantized tensor (should have `block_size` attribute)

block_size.Expecting input to have `block_size` attribute)r#   r#   r-   )hasattrrS   r_   r2   r3   r\   s    r   _is_rowwise_scaledrc      sO    
 1l##U%UU#$!%%'A+"6!''"+"GGGr   c                    ^  [        T S5      (       d   S5       e[        U 4S j[        T R                  5       5       5      $ )r^   r_   r`   c              3      >#    U  H=  nTR                   U   S :H  =(       d    TR                   U   TR                  U   :H  v   M?     g7f)r-   N)r_   r3   )rK   rL   r\   s     r   rM   (_is_tensorwise_scaled.<locals>.<genexpr>   s>      HU1Q2>aAGGAJ!>>s   AA)ra   allrT   ndimrb   s   `r   _is_tensorwise_scaledri      s@    
 1l##U%UU# HMaff  r   c                     [        U S5      (       d   S5       eU R                  n[        U5      S:  =(       a+    [        R                  " USS 5      S:H  =(       a    US   S:H  $ )zChecks if a quantized tensor is scaled with a block size of 1x128
Args:
    x: quantized tensor (should have `block_size` attribute)
r_   r`   r.   Nr-   r#      )ra   r_   rU   mathprodr\   bs     r   _is_1_128_scaledrp      sZ    
 1l##U%UU#	Aq6Q;B499QsV,1BaeslBr   c                     [        U S5      (       d   S5       eU R                  n[        U5      S:H  =(       a    US   S:H  =(       a    US   S:H  $ )zChecks if a quantized tensor is scaled with a block size of 128x128
Args:
    x: quantized tensor (should have `block_size` attribute)
r_   r`   r.   r   rk   r#   )ra   r_   rU   rn   s     r   _is_128_128_scaledrr      sN    
 1l##U%UU#	Aq6Q;61Q43;61Q43;6r   gc                     SSK Jn  [        U 5      S:H  =(       a'    U S   U" SS/5      :H  =(       a    U S   U" SS/5      :H  $ )Nr   )PerBlockr.   r#   rk   ) torchao.quantization.granularityru   rU   )rs   ru   s     r   !_granularity_is_a_1_128_w_128_128rw      sF     q6Q;V1Q48QH#55V!A$(CQT:BV:VVr   granularityc                 F   SSK JnJn  S nU c  U" 5       U" 5       4nU$ [        XU45      (       a  X 4nU$ [        U [        [
        45      (       a  [        U 5      S:X  a  [        U S   U5      =(       a    [        U S   U5      n[        U S   U5      =(       a    [        U S   U5      n[        U 5      nU(       d  U(       d  U(       d  [        SU  S35      e[        U S   [        U S   5      5      (       d  [        SU  S35      e[	        U 5      nU$ [        SU  S35      e)	Nr   PerRow	PerTensorr.   r#   zUnsupported granularity types: .zEDifferent granularities for activation and weight are not supported: z#Invalid granularity specification: )
rv   r{   r|   
isinstancerS   listrU   rw   
ValueErrortype)rx   r{   r|   processed_granularityis_per_tensor
is_per_rowis_a_1_128_w_128_128s          r   _normalize_granularityr      sH   
 !!*ik :* ! ) 
KV!4	5	5!, :& ! % 
K%	/	/C4D4I";q>9= 
*NIC
  A7 
JNF=

  AM/C>{m1MNN+a.${1~*>??WXcWddef  !&k 2 !  >{m1MNNr   granularitiesc                    SSK JnJn  [        U S   U5      =(       a    [        U S   U5      n[        U S   U5      =(       a    [        U S   U5      n[	        U 5      nU(       d  U(       an  [
        R                  R                  5       (       dJ  [
        R                  R                  5       (       a  [        5       (       d  [        5       (       d   S5       egggU(       a  [        5       (       d   S5       eg[        SU  S35      e)	a  
Validate that the hardware supports the requested granularities.

Args:
    granularities: Tuple of (activation_granularity, weight_granularity)

Raises:
    AssertionError: If hardware doesn't support the requested granularity
    ValueError: If invalid granularity type is provided
r   rz   r#   uU   Float8 dynamic quantization requires CUDA compute capability ≥8.9 or MI300+ or XPU.u[   Float8 1x128 activation and 128x128 weight scaling requires CUDA compute capability ≥8.9.zInvalid granularities r}   N)rv   r{   r|   r~   rw   r?   xpuis_availablecudar   r   r   )r   r{   r|   r   r   r   s         r   _check_hardware_supportr     s    
 }Q/; 
a)AM M!,f5 *a&;J =]K
yy%%''JJ##%%*;*=*=	
 d	
 
AK*= (
 
 "" 	
i	
" 1-BCCr   )NNF)"r   rl   typingr   r   r   r   r   r?   torchao.float8.float8_utilsr   r	   torchao.float8.typesr
   torchao.utilsr   r   rR   r   r)   intr4   dtyper   rC   r[   rc   ri   rp   rr   r   rw   r   r   r   r   r   <module>r      sh    ; ;  K /
 
 Z    % 66>	4%,, U38_ 0 &*! %%% % 	%
 ++% 6"% 6
% % %P0H<<0HS	0H 
0H 	0H
 
0H 0H \\0HfH%,, H4 HU\\ d C C C7%,, 74 7Wnn,-^	W 
W%!..01 "	
%! >>)*%!P&D78&D	&Dr   