
    `i=                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
Jr  S SKJrJrJrJr  S SKJrJr  S SKJr  S SKJr  S SKJr  \ R2                  " \5      r\S	S
4\SS4\SS4\S	S
4\SS4S.r   SS\S\S\S\S\4
S jjr S\S\	\\4   4S jr!S r"S r#       SS jr$S r%S r&\S:X  a  \&" 5         gg)    N)Dict)deque)
ModelProtoTensorProtonumpy_helper
load_model)BertOnnxModelBertOptimizationOptions)BertOnnxModelTF)BertOnnxModelKeras)Gpt2OnnxModelpytorchTtf2onnxF
keras2onnx)bertbert_tf
bert_kerasgpt2gpt2_tfonnx_model_pathuse_gpuoptimized_model_path	opt_levelreturnc                    SSK nU(       a+  SUR                  5       ;  a  [        R                  S5        U $ UR	                  5       nUS:X  a  UR
                  R                  Ul        OEUS:X  a  UR
                  R                  Ul        O#US:X  d   eUR
                  R                  Ul        Uc   U SS nS	R                  XcU(       a  S
OS5      nX%l        U(       d  UR                  XS/S9nO'UR                  X5      nSUR                  5       ;   d   e[        R                  R!                  U5      (       a$  [        R                  R#                  U5      (       d   e[        R%                  SR                  U5      5        U$ )ai  
Use onnxruntime to optimize model.

Args:
    onnx_model_path (str): the path of input onnx model.
    use_gpu (bool): whether the optimized model is targeted to run in GPU.
    optimized_model_path (str or None): the path of optimized model.
    opt_level (int): graph optimization level.

Returns:
    optimized_model_path (str): the path of optimized model
r   NCUDAExecutionProviderz3There is no gpu for onnxruntime to do optimization.      c   z{}_o{}_{}.onnxgpucpuCPUExecutionProvider)	providersz)Save optimized model by onnxruntime to {})onnxruntimeget_available_providersloggererrorSessionOptionsGraphOptimizationLevelORT_ENABLE_BASICgraph_optimization_levelORT_ENABLE_EXTENDEDORT_ENABLE_ALLformatoptimized_model_filepathInferenceSessionget_providersospathexistsisfiledebug)r   r   r   r   r%   sess_optionspath_prefixsessions           g/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime_tools/transformers/optimizer.pyoptimize_by_onnxruntimer<   /   sR     *+2U2U2WWJK--/LA~0;0R0R0c0c-	a0;0R0R0f0f-B0;0R0R0a0a-#%cr*/66{X_uejk,@)..YoXp.q..M&'*?*?*AAAA77>>.//BGGNNCW4X4XXX
LL<CCDXYZ    c                 N    [        U SSS9n[        USSS9nUR                  5       $ )z
Get counter of fused operators in optimized model.

Args:
    optimized_model_path (str): the path of onnx model.

Returns:
    A dictionary with operator type as key, and count as value
NTr/   load_external_data      )	num_headshidden_size)r   r	   get_fused_operator_statistics)r   model	optimizers      r;   get_fusion_statisticsrH   _   s0     +DTREersCI2244r=   c                     [         R                  " 5       n U R                  SS[        SS9  U R                  SS[        SS9  U R                  SS[        R                  S	[        [        R                  5       5      S
SR                  [        R                  5       5      -   S9  U R                  SS[        SSS9  U R                  SS[        SSS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSSS9  U R                  SS9  U R                  SSSS S9  U R                  SS!9  U R                  S"SSS#S9  U R                  SS$9  U R                  S%SSS&S9  U R                  SS'9  U R                  S(SSS)S9  U R                  SS*9  U R                  S+SSS,S9  U R                  SS-9  U R                  S.SSS/S9  U R                  SS09  U R                  S1SSS2S9  U R                  SS39  U R                  S4SSS5S9  U R                  SS69  U R                  S7SSS8S9  U R                  SS99  U R                  S:SSS;9  U R                  SS<9  U R                  S=SSS>S9  U R                  SS?9  U R                  S@SSSAS9  U R                  SSB9  U R                  SCS[        / SDQSESFSG9  U R                  SHSSSIS9  U R                  SSJ9  U R                  5       nU$ )KNz--inputTzinput onnx model path)requiredtypehelpz--outputzoptimized onnx model pathz--model_typeFr   z!Model type selected in the list: z, )rJ   rK   defaultchoicesrL   z--num_headsrA   zwnumber of attention heads. 12 for bert-base model and 16 for bert-large. For BERT, set it to 0 to detect automatically.)rJ   rK   rM   rL   z--hidden_sizerB   zwbert model hidden size. 768 for bert-base model and 1024 for bert-large. For BERT, set it to 0 to detect automatically.z--input_int32
store_truezKUse int32 (instead of int64) tensor as input to avoid unnecessary data cast)rJ   actionrL   )input_int32z	--float16zdIf your target device is V100 or T4 GPU, use this to convert float32 to float16 for best performance)float16z--disable_attentionzdisable Attention fusion)disable_attentionz--disable_skip_layer_normz%disable SkipLayerNormalization fusion)disable_skip_layer_normz--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)disable_embed_layer_normz--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)disable_bias_skip_layer_normz--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)disable_bias_geluz--disable_layer_normz!disable LayerNormalization fusion)disable_layer_normz--disable_geluzdisable Gelu fusion)disable_geluz--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)enable_gelu_approximationz--use_mask_indexzBuse mask index instead of raw attention mask in attention operator)use_mask_indexz--no_attention_maskz1no attention mask. Only works for model_type=bert)no_attention_maskz	--verbose)rJ   rP   verbosez	--use_gpuzuse GPU inference)r   z--only_onnxruntimezoptimized by onnxruntime only)only_onnxruntimez--opt_level)r   r   r   r   r   z;onnxruntime optimization level. 0 will disable onnxruntime.)rJ   rK   rN   rM   rL   z--use_external_data_formatzuse external data format)use_external_data_format)argparseArgumentParseradd_argumentstrlowerlistMODEL_CLASSESkeysjoinintset_defaults
parse_args)parserargss     r;   _parse_argumentsro   n   s   $$&F
	DsAXY

TB]^
!& YY & $]%7%7%9 :@499]M_M_MaCbb  d  	B    	B   !&+j  l E*
s	  u
 &
-lYst
%0
3!&+D  F 6
4!&+E  G 7
8!&+Q  S U;
-!&+H  J %0
.!&+@  B 51
(5Tij
U+
5!&+J  L %8
*!&+a  c u-
-!&+P  R %0
eLI
&
eLObc
&
,u\Xwx
/
!&  - !Z  \ 4!&+7  9 7DKr=   c                 6   [        U R                  5      nU R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl	        U R                  (       a  SUl        U R                  (       a  SUl        U R                  (       a  SUl        U R                   (       a  SUl        U R"                  (       a  UR%                  S5        U R&                  (       a  UR)                  5         U$ )NFT)r
   
model_typerY   enable_gelurX   enable_layer_normrS   enable_attentionrT   enable_skip_layer_normrU   enable_embed_layer_normrV   enable_bias_skip_layer_normrW   enable_bias_gelurZ   r[   use_raw_attention_maskr\   disable_attention_mask)rn   optimization_optionss     r;   _get_optimization_optionsr|      s    24??C+0(16.05-##6;3$$7<4((;@805-%%9=633E:335r=   c           	      d   [         U   u  pn
SnUS:  a  [        XUS9nOU
(       a  [        U SSS9n[        U=(       d    U SSS9nUR                  (       aB  XR                  :w  a3  [        R                  SU	 SUR                   S	UR                   S
35        Uc  [        U5      nU" XU5      nU(       d  UR                  U5        U(       a:  [        R                  " U5        [        R                  SR                  U5      5        SUR                  l        SSKJn  XR                  l        U$ )a  Optimize Model by OnnxRuntime and/or offline fusion logic.

The following optimizes model by OnnxRuntime only, and no offline fusion logic:
    optimize_model(input, opt_level=1, use_gpu=False, only_onnxruntime=True)
If you want to optimize model by offline fusion logic.
    optimize_model(input, model_type, num_heads=12, hidden_size=768, optimization_options=your_options)

Args:
    input (str): input model path.
    model_type (str): model type - like bert, bert_tf, bert_keras or gpt2.
    num_heads (int): number of attention heads. Default is 0 to allow detect the parameter from graph automatically (for model_type "bert" only).
    hidden_size (int): hidden size. Default is 0 to allow detect the parameter from graph automatically (for model_type "bert" only).
    optimization_options (OptimizationOptions or None): optimization options that can use to turn on/off some fusions.
    opt_level (int): onnxruntime graph optimization level (0, 1, 2 or 99). When the level > 0, onnxruntime will be used to optimize model first.
    use_gpu (bool): use gpu or not for onnxruntime.
    only_onnxruntime (bool): only use onnxruntime to optimize model, and no offline fusion logic is used.

 Returns:
    object of an optimizer class.
Nr   )r   r   FTr?   z#Model producer not matched: Expect z,  Got  z0. Please specify correct --model_type parameter.zRemove tempoary model: {}zonnxruntime.transformersr   )__version__)rg   r<   r   producer_namer'   warningproducer_versionr
   optimizer3   remover7   r/   rF   r%   r   )inputrq   rC   rD   r{   r   r   r_   optimizer_classproducerrun_onnxruntimetemp_model_pathrF   rG   onnxruntime_versions                  r;   optimize_modelr     s   8 4A3L0_O1}1%T]^	 2%RST/%QUVEx+>+>>1(75CVCVBWWXY^YoYoXp  qa  b	
 #6zB+>I/0 
		/"077HI$>IOO!>':OO$r=   c                 f    U (       a  [         R                  " SSS9  g [         R                  " SS9  g )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(funcName)20s: %(message)s)r   )coloredlogsinstallr]   s    r;   _setup_loggerr   F  s%    '/ij =>r=   c                  &   [        5       n [        U R                  5        [        R                  R                  U R                  5      [        R                  R                  U R                  5      :X  a  [        R                  S5        [        U 5      n[        U R                  U R                  U R                  U R                  U R                  UU R                   U R"                  S9nU R$                  (       a  UR'                  5         U R(                  (       a  UR+                  5         UR-                  U R                  U R.                  5        UR1                  5       (       a  [        R3                  S5        g [        R3                  S5        g )NzYSpecified the same input and output path. Note that this may overwrite the original model)r   r{   r   r_   z#The model has been fully optimized.zThe model has been optimized.)ro   r   r^   r3   r4   realpathr   outputr'   r   r|   r   rq   rC   rD   r   r   r_   rR    convert_model_float32_to_float16rQ   change_input_to_int32save_model_to_filer`   is_fully_optimizedinfo)rn   r{   rG   s      r;   mainr   M  s   D$,,	ww

#rww'7'7'DDrt4T:tzz##~~#//)-4H'+||040E0EGI ||224'')  d.K.KL##%%9:34r=   __main__)FNr   )r   r   r   Nr   FF)'loggingr   onnxr3   sysra   numpynptypingr   collectionsr   r   r   r   r   onnx_model_bertr	   r
   onnx_model_bert_tfr   onnx_model_bert_kerasr   onnx_model_gpt2r   	getLogger__name__r'   rg   rd   boolrj   r<   rH   ro   r|   r   r   r    r=   r;   <module>r      s  (    	 
     B B B . 4 )			8	$ It,E2%|U;It,y%0 -28<-/- S - %)- 25-  (+-  58- `5 5S#X 5zz 6 % (, $)>B?5B zF r=   