
    i7              	       `   S SK r S SKJr  \" S5      (       a  SSKJr  S SKJrJr  S SKJr  S SK	J
r
Jr  S S	KJrJrJr  S S
KJrJrJrJr  SSKJr  SSKJr  SSKJr  / SQrS\S\S\4S jrS\S\S\4S jr\ R8                  R:                  R<                  R>                  \ R8                  R:                  R<                  R@                  \ R8                  R:                  RB                  R>                  \ R8                  RD                  RF                  /r$S\S\%4S jr&S r'  SS\S\%S\%S\4S jjr(g)    N)torch_version_at_least2.7.0   )constant_fold)GraphModuleNode)PassManager)_fold_conv_bn_qat_fuse_conv_bn_qat)DuplicateDQPassPortNodeMetaForQDQ	Quantizer)_disallow_eval_train_fuse_conv_bn__fuse_linear_bn__get_node_name_to_scope)#_convert_to_reference_decomposed_fx)prepare) reference_representation_rewrite)prepare_pt2eprepare_qat_pt2econvert_pt2emodel	quantizerreturnc                 X   [         R                  R                  S5        U R                  n[	        U 5      n[        U 5        [        U 5        UR                  U 5      n UR                  U 5        UR                  U 5        [        U USUR                  S9n U R                  R                  U5        [        U 5      n U R                  R                   H  nUR                   S:X  d  M  UR"                  [         R$                  R&                  R(                  L d  MH  UR*                  S   n[-        U[.        5      (       d   eUR                   S:X  d   e[-        UR"                  [0        5      (       d   eU R3                  UR"                  5      n[5        Xa5      n[7        XR"                  U5        M     U R                  R                   H  nUR                   S:X  d  M  UR"                  [         R$                  R&                  R8                  L d  MH  UR*                  S   n[-        U[.        5      (       d   eUR                   S:X  d   e[-        UR"                  [0        5      (       d   eU R3                  UR"                  5      n	[5        X5      n
[7        XR"                  U
5        M     U $ )a  Prepare a model for post training quantization

Args:
  * `model` (torch.fx.GraphModule): a model captured by `torch.export.export` API.
  * `quantizer`: A backend specific quantizer that conveys how user want the
    model to be quantized. Tutorial for how to write a quantizer can be found here:
    https://pytorch.org/tutorials/prototype/pt2e_quantizer.html

Return:
  A GraphModule with observer (based on quantizer annotation), ready for calibration

Example::

    import torch
    from torchao.quantization.pt2e.quantize_pt2e import prepare_pt2e
    from torchao.quantization.pt2e.quantizer import (
        XNNPACKQuantizer,
        get_symmetric_quantization_config,
    )

    class M(torch.nn.Module):
        def __init__(self) -> None:
            super().__init__()
            self.linear = torch.nn.Linear(5, 10)

       def forward(self, x):
           return self.linear(x)

    # initialize a floating point model
    float_model = M().eval()

    # define calibration function
    def calibrate(model, data_loader):
        model.eval()
        with torch.no_grad():
            for image, target in data_loader:
                model(image)

    # Step 1. program capture
    # NOTE: this API will be updated to torch.export API in the future, but the captured
    # result shoud mostly stay the same
    m = torch.export.export(m, *example_inputs).module()
    # we get a model with aten ops

    # Step 2. quantization
    # backend developer will write their own Quantizer and expose methods to allow
    # users to express how they
    # want the model to be quantized
    quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
    m = prepare_pt2e(m, quantizer)

    # run calibration
    # calibrate(m, sample_inference_data)
z&torchao.quantization.pt2e.prepare_pt2eFis_qatobs_or_fq_callbackcall_functionr   get_attrr   )torch_C_log_api_usage_oncemetar   r   r   transform_for_annotationannotatevalidater   prepare_obs_or_fq_callbackupdater   graphnodesoptargetopshigher_orderscanargs
isinstancer   strget_submoduler   setattr
while_loop)r   r   original_graph_metanode_name_to_scopenodescan_combine_fn_nodescan_combine_fnprepared_scan_combine_fnwhile_loop_body_fn_nodewhile_loop_body_fnprepared_while_loop_body_fns              f/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/torchao/quantization/pt2e/quantize_pt2e.pyr   r   )   s0   t 
HH  !IJ**07 5U..u5Euu$??	E 
JJ)* 'E
 !!77o%$++9O9O9T9T*T#'99Q< 2D9999'**j8882993????#112F2M2MNO'3O'O$E668PQ " !!GG&uyy55@@@&*iil#5t<<<<*--;;;5<<cBBBB!&!4!45L5S5S!T*67I*U'E99;VW " L    c                 f   [         R                  R                  S5        U R                  n[	        U 5      nUR                  U 5      n UR                  U 5        UR                  U 5        [        U 5        [        U USUR                  S9n U R                  R                  U5        [        U 5      n U $ )a  Prepare a model for quantization aware training

Args:
  * `model` (torch.fx.GraphModule): see :func:`~torchao.quantization.pt2e.quantize_pt2e.prepare_pt2e`
  * `quantizer`: see :func:`~torchao.quantization.pt2e.quantize_pt2e.prepare_pt2e`

Return:
  A GraphModule with fake quant modules (based on quantizer annotation), ready for
  quantization aware training

Example::
    import torch
    from torchao.quantization.pt2e.quantize_pt2e import prepare_qat_pt2e
    from torchao.quantization.pt2e.quantizer import (
        XNNPACKQuantizer,
        get_symmetric_quantization_config,
    )

    class M(torch.nn.Module):
        def __init__(self) -> None:
            super().__init__()
            self.linear = torch.nn.Linear(5, 10)

       def forward(self, x):
           return self.linear(x)

    # initialize a floating point model
    float_model = M().eval()

    # define the training loop for quantization aware training
    def train_loop(model, train_data):
        model.train()
        for image, target in data_loader:
            ...

    # Step 1. program capture
    # NOTE: this API will be updated to torch.export API in the future, but the captured
    # result shoud mostly stay the same
    m = torch.export.export(m, *example_inputs).module()
    # we get a model with aten ops

    # Step 2. quantization
    # backend developer will write their own Quantizer and expose methods to allow
    # users to express how they
    # want the model to be quantized
    quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
    m = prepare_qat_pt2e(m, quantizer)

    # run quantization aware training
    train_loop(prepared_model, train_loop)

z*torchao.quantization.pt2e.prepare_qat_pt2eTr   )r"   r#   r$   r%   r   r&   r'   r(   r   r   r)   r*   r   )r   r   r8   r9   s       rA   r   r      s    p 
HH  !MN**07..u5Euu e$??	E 
JJ)* 'ELrB   nc                 T    U R                   S:H  =(       a    U R                  [        ;   $ )a@  If there is any pure ops between get_attr and quantize op they will be const propagated
e.g. get_attr(weight) -> transpose -> quantize -> dequantize*
(Note: dequantize op is not going to be constant propagated)

This filter is added because we don't want to constant fold the things that are not
related to quantization
r    )r-   r.   
_QUANT_OPS)rD   s    rA   _quant_node_constraintrG      s!     44?"=qxx:'==rB   c                    SSK Jn  SSKJn  SSKJn  SnSnU R                  5        H  u  pg[        U[        R                  R                  R                  R                  5      (       dz  [        U[        R                  R                  R                  R                  5      (       d=  [        U[        R                  R                  R                  R                  5      (       a  Sn[        Xq5      (       d"  [        Xs5      (       d  [        Xr5      (       d  M  SnM     U(       a  U(       a   S5       eU(       a  U(       a   S5       eU$ )Nr   )FakeQuantize)AffineQuantizedObserverBase)ObserverBaseFTz2Cannot be prepared using both torch.ao and torchao)'torchao.quantization.pt2e.fake_quantizerI   "torchao.quantization.pt2e.observerrJ   rK   named_modulesr3   r"   aoquantizationfake_quantizeobserver)r   torchao_FakeQuantize#torchao_AffineQuantizedObserverBasetorchao_ObserverBaseis_torch_ao_preparedis_torchao_prepared_ms           rA   1_is_torchao_prepared_do_not_use_outside_this_filerZ      s    X ##%q%((//==JJKK!UXX22;;HHII!UXX22;;WWXX#' q//!22!AA"& & & 	
@	
& ' 	
@	
' rB   use_reference_representationfold_quantizec                    [         R                  R                  S5        [        U[        5      (       d  [        SU S35      eU R                  nU R                  R                   H  nUR                  S:X  d  M  UR                  [         R                  R                  R                  L d  MH  UR                  S   n[        U[        5      (       d   eUR                  S:X  d   e[        UR                  [         5      (       d   eU R#                  UR                  5      n[%        UUUS9n['        XR                  U5        M     U R                  R                   H  nUR                  S:X  d  M  UR                  [         R                  R                  R(                  L d  MH  UR                  S   n[        U[        5      (       d   eUR                  S:X  d   e[        UR                  [         5      (       d   eU R#                  UR                  5      n	[%        U	UUS9n
['        XR                  U
5        M     [+        U 5      n [-        U 5      n [/        [1        5       /5      nU" U 5      R2                  n [/        [5        5       /5      nU" U 5      R2                  n U(       a   [7        S	5      (       a  [9        U [:        5        U(       a  [=        U 5      n U R                  R?                  U5        [A        U 5      n U $ )
a  Convert a calibrated/trained model to a quantized model

Args:
  * `model` (torch.fx.GraphModule): calibrated/trained model
  * `use_reference_representation` (bool): boolean flag to indicate whether to produce referece representation or not
  * `fold_quantize` (bool): boolean flag for whether fold the quantize op or not

Returns:
    quantized model, either in q/dq representation or reference representation

Example::

    # prepared_model: the model produced by `prepare_pt2e`/`prepare_qat_pt2e` and calibration/training
    # `convert_pt2e` produces a quantized model that represents quantized computation with
    # quantize dequantize ops and fp32 ops by default.
    # Please refer to
    # https://pytorch.org/tutorials/prototype/pt2e_quant_ptq_static.html#convert-the-calibrated-model-to-a-quantized-model
    # for detailed explanation of output quantized model
    quantized_model = convert_pt2e(prepared_model)

z&torchao.quantization.pt2e.convert_pt2ezjUnexpected argument type for `use_reference_representation`, please make sure you intend to pass argument z to convert_pt2er    r   r!   )r[   r\   r   r   )!r"   r#   r$   r3   bool
ValueErrorr%   r+   r,   r-   r.   r/   r0   r1   r2   r   r4   r5   r   r6   r7   r   r
   r	   r   graph_moduler   r   r   rG   r   r*   r   )r   r[   r\   r8   r:   r;   r<   converted_scan_combine_fnr>   r?   converted_while_loop_body_fnpms               rA   r   r     s   4 
HH  !IJ2D99<<X;YYik
 	
  ** !!77o%$++9O9O9T9T*T#'99Q< 2D9999'**j8882993????#112F2M2MNO(4-I+)%
 E668QR " !!GG&uyy55@@@&*iil#5t<<<<*--;;;5<<cBBBB!&!4!45L5S5S!T+7"-I+,(
 E99;WX "  06Ee$E	o'(	)BuI""E	(*+	,BuI""E/88e34#07	JJ)* 'ELrB   )FT))r"   torchao.utilsr   r   torch.fxr   r   "torch.fx.passes.infra.pass_managerr	   #torchao.quantization.pt2e.qat_utilsr
   r   #torchao.quantization.pt2e.quantizerr   r   r   torchao.quantization.pt2e.utilsr   r   r   r   convertr   r   r   __all__r   r   r/   quantized_decomposedquantize_per_tensordefaulttensorquantize_per_channeltorchaoquantize_affinerF   r^   rG   rZ   r    rB   rA   <module>rt      sH    0'"", & : T 
  9  Nggg gTJJJ J\ 
II""66>>	II""66==	II""77??	II%%	
>d >t >"N */SS"&S S 	SrB   