
    iT                        S SK Jr  S SKJrJr  S SKJr  S SKJrJ	r	J
r
JrJrJrJr  S SKrS SKJr  S SKJrJr  S SKJr  S S	KJr  S S
KJrJrJr  S SKJr  S SKJ r   S SK!J"r"  S SK#J$r$  SSK%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2  \&r3/ SQr4\ " S S\5      5       r5\5Rl                  S4\&\" \/\Rn                  \Rp                  S9S4\5Rr                  S4\)\" \/\Rn                  \Rt                  S9S4\5Rv                  S4\'\" \/\Rn                  \Rx                  S9S4\5Rz                  S4\'\" \/\Rn                  \Rx                  S9\" \.\Rn                  \Rx                  S94\5R|                  S4\,\" \/5      S4\5R~                  S4\+\" \/\R                  \Rx                  S9S4\5Rv                  S4\(\" \1\Rn                  \Rx                  S9S4\5Rz                  S4\(\" \1\Rn                  \Rx                  S9\" \0\Rn                  \Rx                  S94\5R|                  S4\-\" \15      S40	rA\ " S S5      5       rB " S S\$5      rCS rDS rEg)    )	dataclass)IntEnumunique)partial)CallableDictListOptionalSequenceSetTupleN)QnnPassManager)constraints_loaderget_backend_opinfo)"load_backend_rules_and_constraints)NormalizedConstraints)_soc_info_tableQcomChipsetQnnExecuTorchBackendType)
OpOverload)GraphModule)UniformQuantizationObserverBase)	Quantizer   )get_16a16w_qnn_ptq_configget_16a4w_qnn_ptq_configget_16a4w_qnn_qat_configget_16a8w_qnn_ptq_configget_16a8w_qnn_qat_configget_8a4w_qnn_ptq_configget_8a8w_qnn_ptq_configget_8a8w_qnn_qat_configget_ptq_per_block_quant_config get_ptq_per_channel_quant_configget_qat_per_block_quant_config get_qat_per_channel_quant_configQuantizationConfig)QnnQuantizer
QuantDtyper   r   r   r   r!   r"   r    r   r#   c                   0    \ rS rSrSrSrSrSrSrSr	Sr
S	rg
)r)   F   z'
bits of activation and bits of weight
r   r                N)__name__
__module____qualname____firstlineno____doc__
use_16a16w	use_16a8w	use_16a4wuse_16a4w_blockuse_8a8wuse_8a4w__static_attributes__r0       o/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/executorch/backends/qualcomm/quantizer/quantizer.pyr)   r)   F   s'     JIIOHHr=   r)   F)	act_dtypeweight_dtypeTc                       \ rS rSr% \R
                  r\\S'   Sr\	\S'   Sr
\	\S'   Sr\	\S'   Sr\\   \S'   Sr\	\S	'   Sr\\   \S
'   S rSrg)ModuleQConfig   quant_dtypeFis_qatis_conv_per_channelis_linear_per_channelNact_observeract_symmetricepsc                 l	   U R                   U R                  4[        ;  a&  [        SU R                    SU R                   S35      e[        U R                   U R                  4   u  nnnU R                  (       a&  U" U R
                  U R                  U R                  S9OU" U R
                  U R                  S9U l        Sn/ U l        [        U5       Ho  nU R                  R                  U R                  (       a'  U" U R
                  U R                  UU R                  S9OU" U R
                  XPR                  S95        Mq     [        R                  R                  R                  R                  S	[        R                  R                  R                   R                  S	[        R                  R                  R"                  R                  S	[        R                  R                  R$                  R&                  S
[        R                  R                  R(                  R&                  S
[        R                  R                  R*                  R                  S	0U l        0 U l        U R0                  (       Ga-  [        R                  R                  R                  R                  [        R                  R                  R                   R                  [        R                  R                  R"                  R                  [        R                  R                  R$                  R&                  [        R                  R                  R(                  R&                  /nU R.                  R3                  U Vs0 s H#  owU R,                  ;   d  M  XpR,                  U   _M%     sn5        U R4                  (       ay  [        R                  R                  R*                  R                  /nU R.                  R3                  U Vs0 s H#  owU R,                  ;   d  M  XpR,                  U   _M%     sn5        U(       aq  / U l        [        U5       HZ  nU R6                  R                  U R                  (       a  U" U R
                  U R                  US9OU" U R
                  US95        M\     g g s  snf s  snf )Nz the quant config, (quant_dtype: z
, is_qat: z) is not support)rI   rH   rJ   )rI   rJ   r,   )rI   rH   ch_axisrJ   )rI   rL   rJ   r   r   )rI   rH   rL   )rI   rL   )rD   rE   QUANT_CONFIG_DICTRuntimeErrorrH   rI   rJ   quant_configper_channel_quant_config_listrangeappendtorchopsatenconv1ddefaultconv2dconv3dconv_transpose2dinputconv_transpose3dlinearop_axis_dict use_per_channel_weight_quant_opsrF   updaterG   per_block_quant_config_list)	selfquant_config_funcper_channel_quant_config_funcper_block_quant_config_funcpotential_axisiconv_opsk
linear_opss	            r>   __post_init__ModuleQConfig.__post_init__   s   dkk*2CC243C3C2DJt{{m[kl  t//=>		
)'    "00!..HH #1C1CR 	 -/*~&A..55 (( 2&*&8&8%)%6%6 ! HH	 7&*&8&8! '$ IINN!!))1IINN!!))1IINN!!))1IINN++111IINN++111IINN!!))1
 13-###		%%--		%%--		%%--		//55		//55H 11882:U(Q4CTCT>T(%%a(((U %%))..//778J11882<W*QTEVEV@V(%%a((*W '/1D,>*0077  ,, 4*.*<*<)-):):$% 9*.*<*<a + ' V
 Xs   8R,R,R1R1)r^   ra   rP   rO   r_   )r1   r2   r3   r4   r)   r:   rD   __annotations__rE   boolrF   rG   rH   r
   r   rI   rJ   floatrk   r<   r0   r=   r>   rB   rB      s`    (11K1FD %%"'4'>BL(:;BM4C%Sr=   rB   c                   V  ^  \ rS rSrSr\R                  \R                  S4S\S\S\	4U 4S jjjr
\S 5       rS	\S
\4S jrS	\S
\4S jrS	\S
S4S jrS\S
S4S jrS\S
S4S jrS\R(                  R*                  S
\\   4S jrS\R(                  R*                  S
\\\      4S jrS\R(                  R*                  4S jrS\\   S
S4S jrS\\    S
S4S jr!S\\"   S
S4S jr#S
\$\"   4S jr%S\&\ \'4   S
S4S jr(      S%S\)S
S4S  jjr*S! r+S"\\'\\,4      S
S4S# jr-S$r.U =r/$ )&r(   i  a  
QnnQuantizer is a quantization annotator designed for QNN backends.
It utilizes the rules_map found in the respective {backend}_rules.py file,
which is a dictionary that links OpOverload to both annotator and validation functions.
This mapping guides how each node is annotated and validated for quantization.

During validation, the backend_opinfo pybind library containing operation details
from the QNN SDK is used to verify quantization constraints and maintain backend compatibility.
This library is available with QNN SDK version 2.41 or later.
If the library is unavailable, QnnQuantizer will not validate quantization constraints for operations.

Args:
    backend: QnnQuantizer uses the backend_type to dynamically load the appropriate backend rules as needed.
    soc_model: QnnQuantizer checks each operation according to the soc_model. For example, LPBQ requires V69 or a newer version.
    strict:
      When enabled (default), the validation stage raises a ValueError if quantization constraints are not met.
      In this mode, all quantization constraints must be satisfied to fully delegate to the QNN Backend.
      When disabled, only warnings will be logged.

Example usage:
    quantizer = QnnQuantizer(
        backend=QnnExecuTorchBackendType.kHtpBackend,
        soc_model=QcomChipset.SM8750
    )
    quantizer.set_default_quant_config(
        quant_dtype=QuantDtype.use_8a8w,
        is_qat=False,
        is_conv_per_channel=True,
        is_linear_per_channel=True,
        act_observer=MovingAverageMinMaxObserver,
    )
    quantizer.set_block_size_map({"conv2d": (1, 128, 1, 1)})
    quantizer.set_submodule_qconfig_list([
        (get_submodule_type_predicate("Add"), ModuleQConfig(quant_dtype=QuantDtype.use_16a4w))
    ])
    quantizer.add_custom_quant_annotations(...)
    quantizer.add_discard_nodes([node.name to skip annotation])
    quantizer.add_discard_ops([node.target to skip annotation])

Tbackend	soc_modelstrictc                   > [         TU ]  5         X0l        [        U5      U l        [
        U   U l        [        U R                  5      u  U l        U l	        [        U R                  R                  5       5      U l        U R                  R                  5       U l        [        U R                  U5      U l        [#        5       U l        / U l        0 U l        / U l        [        5       U l        S U l        g N)super__init__rs   strrq   r   soc_infor   
_rules_map_constraint_cachesetkeyssupported_opscopy	quant_opsr   backend_opinforB   default_quant_configsubmodule_qconfig_listblock_size_mapcustom_quant_annotationsdiscard_nodes_recipe)rb   rq   rr   rs   	__class__s       r>   rw   QnnQuantizer.__init__5  s     	7|'	2 3ULL3
// /2$//2F2F2H.I*.*<*<*A*A*C 1yI$1O!  	# !<>%'*ur=   c                     U R                   $ ru   )r   rb   s    r>   recipeQnnQuantizer.recipeT  s    ||r=   modelreturnc                     U R                   (       a'  U R                   R                  XR                  5        U$ U R                  U5        U R	                  U5        U$ )a  
Annotates GraphModule during prepare_pt2e.

If a recipe is provided, it will be used to annotate the model.
Otherwise, fallback to the default annotation flow.

Args:
    model (GraphModule): The FX GraphModule to annotate.

Returns:
    GraphModule: The annotated model.
)r   annotaterz   	_annotate_annotate_custom_annotationrb   r   s     r>   r   QnnQuantizer.annotateX  sI     <<LL!!%9
  NN5!,,U3r=   c                 4    [        5       R                  U5      $ )z
Applies QNN-specific transformation before annotation during prepare_pt2e.

Args:
    model (GraphModule): The FX GraphModule to transform.

Returns:
    GraphModule: The transformed model.
)r   !transform_for_annotation_pipeliner   s     r>   transform_for_annotation%QnnQuantizer.transform_for_annotationm  s     AA%HHr=   Nc                    UR                   R                   H  nUR                  U R                  ;   a  M  U R	                  U5      nU(       d  M9  U R
                  UR                     R                  X#U R                  5      nU R                  (       d  M  U(       a  M  [        SUR                   SUR                   35      e   g )NzValidation failed for node z with target )graphnodesnamer   !_get_normalized_quant_constraintsrz   targetvalidate_fnry   rs   
ValueError)rb   r   nodenormalized_constraints_listvalids        r>   validateQnnQuantizer.validatey  s    KK%%DyyD...*.*P*PQU*V'**4@@t}} ;;;uu$5dii[dkk][  &r=   gmc                     UR                   R                   Ha  nUR                  U R                  ;   a  M  U R	                  U5      nU(       d  M9  U R
                  UR                     R                  X#5        Mc     g)z
Annotates the nodes of the provided GraphModule in-place based on user defined quant configs during prepare_pt2e.

For each node in the graph, nodes without quant config or those explicitly listed in `self.discard_nodes` are not annotated.
N)r   r   r   r   _get_quant_configrz   r   annotate_fn)rb   r   r   rO   s       r>   r   QnnQuantizer._annotate  s[     HHNNDyyD...11$7L|,88L #r=   c                 :    U R                    H  nU" U5        M     g ru   r   )rb   r   annotation_funcs      r>   r   (QnnQuantizer._annotate_custom_annotation  s    #<<OB  =r=   r   c                 x   UR                   n[        U[        5      (       a  gU R                  U5      nU R                  R                  UR                  5      =n(       aa  UR                  R                  UR                   S5      n[        UR                  5      U:  d   SU S35       eUR                  U   nXCl
        U$ X#R                  ;   aB  UR                  U   n[        UR                  5      U:  d   SU S35       eUR                  U   $ X R                  ;   a  UR                  $ [        SU 35        g)ag  
Select the quant config for a node based on priority.

Priority order:
    1. Per-block quant config if block_size is set for node.
    2. Submodule-specific config if predicate matches.
    3. Per-channel config if op is in per-channel set.
    4. Default quant config if op is supported.

Args:
    node (torch.fx.Node): The node to get quant config for.

Nr   z)Unsupported per block quantization axis: z:, please increase the range of per_block_quant_config_listz+Unsupported per channel quantization axis: z<, please increase the range of per_channel_quant_config_listz'No quant config is implemented for op, )r   
isinstancerx   _get_submodule_qconfigr   getr   r^   lenra   
block_sizer_   rP   r   rO   print)rb   r   opconfigr   rL   s         r>   r   QnnQuantizer._get_quant_config  s<    [[b#,,T2,,00;;:;))--dkk1=GF667'A:7)C}~A77@F *M888==bAGF889GCC<WI  FB  CCC77@@&&&7t<=r=   c                    UR                   n[        U[        5      (       a  g S nX R                  ;   a  U R                  R                  U5      R                  =nU R                  R                  5       ;   aO  U R                  R                  U5      nUc1  [        U R                  U5      nU R                  R                  XC5        U$ ru   )r   r   rx   r   rz   r   qnn_opr   get_all_supported_opsr{   r   put)rb   r   r   r   r   s        r>   r   .QnnQuantizer._get_normalized_quant_constraints  s     [[b#&*#.. ??..r2999""88:; +/*@*@*D*DV*L'*2.@''/+ &&**6O**r=   c                 f    U R                    H  u  p#U" U5      (       d  M  Us  $    U R                  $ )a  
Retrieves the `ModuleQConfig` for a given node by matching the first applicable callable function in the `submodule_qconfig_list`.
You can add submodule-specific quant config using the `set_submodule_qconfig_list` method.

Args:
    node (torch.fx.Node): The node for which to retrieve the quant config.

Returns:
    ModuleQConfig: The matched submodule config, or the default config if no match is found.
)r   r   )rb   r   funcqconfigs       r>   r   #QnnQuantizer._get_submodule_qconfig  s2     "88MDDzz 9 (((r=   r   c                     Xl         g)z
Add custom annotation functions to be applied during prepare_pt2e.

Args:
    custom_quant_annotations (Sequence[Callable]): A sequence of functions that take a GraphModule and perform custom annotation.
Nr   )rb   r   s     r>   add_custom_quant_annotations)QnnQuantizer.add_custom_quant_annotations  s     )A%r=   r   c                 $    [        U5      U l        g)z2
Specifies node IDs to exclude from quantization.
N)r|   r   )rb   r   s     r>   add_discard_nodesQnnQuantizer.add_discard_nodes  s     !Zr=   rT   c                 L    U H  nU R                   R                  U5        M      g)z5
Specifies OpOverloads to exclude from quantization.
N)r   remove)rb   rT   r   s      r>   add_discard_opsQnnQuantizer.add_discard_ops  s      BNN!!"% r=   c                     U R                   $ )zj
Returns the set of supported OpOverloads for quantization.

Returns:
    Set[OpOverload]: Supported ops.
)r~   r   s    r>   get_supported_opsQnnQuantizer.get_supported_ops   s     !!!r=   r   c                     Xl         g)z
Set the mapping from node names to block sizes for per-block quantization.

Args:
    block_size_map (Dict[str, Tuple]): Mapping from node name to block size.
N)r   )rb   r   s     r>   set_block_size_mapQnnQuantizer.set_block_size_map	  s
     -r=   rD   c           
      ,    [        UUUUUUUS9U l        g)a  
Set the default quant config for quantizer.

Args:
    quant_dtype (QuantDtype): Specifies the quantized data type. By default, 8-bit activations and weights (8a8w) are used.
    is_qat (bool, optional): Enables Quantization-Aware Training (QAT) mode. Defaults to Post-Training Quantization (PTQ) mode.
    is_conv_per_channel (bool, optional): Enables per-channel quantization for convolution operations.
    is_linear_per_channel (bool, optional): Enables per-channel quantization for linear (fully connected) operations.
    act_observer (Optional[UniformQuantizationObserverBase], optional): Custom observer for activation quantization. If not specified, the default observer is determined by `QUANT_CONFIG_DICT`.

)rE   rF   rG   rH   rI   rJ   N)rB   r   )rb   rD   rE   rF   rG   rH   rI   rJ   s           r>   set_default_quant_config%QnnQuantizer.set_default_quant_config  s&    * %2 3"7%'%
!r=   c                 Z    Xl         U R                   R                  U R                  5        g ru   )r   initialize_default_strategy_opsr~   )rb   r   s     r>   
set_recipeQnnQuantizer.set_recipe1  s    44T5G5GHr=   r   c                     Xl         g)zv
Set specific quant config from a callback function.
If a node fits more than one callback, only apply the first one.
N)r   )rb   r   s     r>   set_submodule_qconfig_list'QnnQuantizer.set_submodule_qconfig_list5  s
     '=#r=   )r{   r   rz   rq   r   r   r   r   r   r   ry   rs   r   r~   )FFFNFN)0r1   r2   r3   r4   r5   r   kHtpBackendr   SM8750rn   rw   propertyr   r   r   r   r   r   r   rS   fxNoder
   r'   r   r	   r   r   r   r   r   r   rx   r   r   r   r   r   r   r   r   r)   r   r   rB   r   r<   __classcell__)r   s   @r>   r(   r(     s   'V -E,P,P!,!3!3	)  	 >  k k *
Ik 
Ik 
Ik d  MK MD M k  d  %>ehhmm %>AS8T %>N+HHMM+	$,-	.+,)588== ) 	A(0(:	A		A(x} ( (&8J#7 &D &"3z? "-c5j1A -d - !#

 

>I=&*5=1H+I&J=	= =r=   r(   c                    ^  U 4S jnU$ )
An example of nn_module_stack
{
    'L__self__': ('', 'executorch.backends.qualcomm.tests.models.SubModules'),
    'L__self___add': ('add', 'executorch.backends.qualcomm.tests.models.Add')
}
c                    > U R                   R                  S5      =n(       a!  UR                  5        H  u  p#TU;   d  M    g   gNnn_module_stackTF)metar   values)r   r   _	type_namemodule_type_strs       r>   	predicate/get_submodule_type_predicate.<locals>.predicateH  sA    "iimm,=>>?> / 6 6 8"i/ !9 r=   r0   )r   r   s   ` r>   get_submodule_type_predicater   ?       r=   c                    ^  U 4S jnU$ )r   c                    > U R                   R                  S5      =n(       a  UR                  5        H  nTU;   d  M    g   gr   )r   r   r}   )r   r   r   module_name_strs      r>   r   /get_submodule_name_predicate.<locals>.predicate[  s?    "iimm,=>>?>',,."d* / r=   r0   )r   r   s   ` r>   get_submodule_name_predicater   R  r   r=   )Fdataclassesr   enumr   r   	functoolsr   typingr   r   r	   r
   r   r   r   rS   5executorch.backends.qualcomm._passes.qnn_pass_managerr   =executorch.backends.qualcomm.quantizer.backend_opinfo_adapterr   r   6executorch.backends.qualcomm.quantizer.registry_loaderr   1executorch.backends.qualcomm.quantizer.validatorsr   4executorch.backends.qualcomm.serialization.qc_schemar   r   r   
torch._opsr   torch.fxr   torchao.quantization.pt2er   #torchao.quantization.pt2e.quantizerr   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'    get_default_16bit_qnn_ptq_config__all__r)   r6   uint16int16r7   int8r8   int4r9   r:   r;   uint8rM   rB   r(   r   r   r0   r=   r>   <module>r
     s   "    G G G  P
 T 
 "   E 9   $ $=   
 
 
 E"!,ll	

 	% 5! ,ll	

 	$ 5! ,ll	

 	$ ' ,ll	

 	*ll	
* % 01#
 % ,kk	

 	# 4  ,ll	

 	# & ,ll	

 	*ll	
) $01"_T n \ \ \~q=9 q=h	&r=   