
    `if                        S r SSKrSSKrSSKrSSKJr  SSKrSSKrSSKrSSKrSSK	J
r
  SSKJrJrJrJrJrJrJrJrJrJr  SSKJr  SSKJrJrJrJr  \R:                  " S5      rSS	KJ r J!r!  \RD                  " S
S9r"S\RF                  ;  a  \$" \"5      \RF                  S'   SSK%r%SSK&J'r'J(r(J)r)J*r*J+r+  S r,S r-S\.S\.4S jr/S r0S r1S r2\3S:X  a  \2" 5         gg)a  Benchmarking the inference of pretrained transformer models.
PyTorch/TorchScript benchmark is based on https://github.com/huggingface/transformers/blob/master/examples/benchmarks.py.
One difference is that random input_ids is generated in this benchmark.

For onnxruntime, this script will convert a pretrained model to ONNX, and optimize it when -o parameter is used.

Example commands:
    Export all models to ONNX, optimize and validate them:
        python benchmark.py -b 0 -o -v -i 1 2 3
    Run OnnxRuntime on GPU for all models:
        python benchmark.py -g
    Run OnnxRuntime on GPU for all models with fp32 optimization:
        python benchmark.py -g -o
    Run OnnxRuntime on GPU with fp16 optimization:
        python benchmark.py -g -o -p "fp16"
    Run TorchScript on GPU for all models:
        python benchmark.py -e torchscript -g
    Run TorchScript on GPU for all models with fp16:
        python benchmark.py -e torchscript -g -p "fp16"
    Run ONNXRuntime and TorchScript on CPU for all models with quantization:
        python benchmark.py -e torchscript onnxruntime -p "int8" -o

It is recommended to use run_benchmark.sh to launch benchmark.
    N)datetime)Enum)
create_onnxruntime_session	Precisionsetup_loggerget_latency_resultoutput_detailsoutput_summaryoutput_fusion_statisticsinference_ortinference_ort_with_io_bindingallocateOutputBuffers)QuantizeHelper)create_onnxruntime_inputload_pretrained_modelexport_onnx_model_from_ptexport_onnx_model_from_tf )MODELSMODEL_CLASSESF)logicalOMP_NUM_THREADS)
AutoConfigAutoTokenizer	AutoModel	GPT2ModelLxmertConfigc                 4   SS K n/ nU (       a+  SUR                  5       ;  a  [        R                  S5        U$ U GHA  n[        U   S   nU GH*  nU[        U5      :  a    M(  US U nSU;   aW  [        R                  " 5          [        U[        U   S   [        U   S   [        U   S   UXUXXUUU5      u  nnnnS S S 5        SU;   a9  [        U[        U   S   [        U   S   [        U   S   UXUXXUUU5      u  nnnnW(       d  M  [        WU S	UUS
9nUc  M  UR                  5        Vs/ s H  nUR                  PM     nn/ n U (       a  SOSn![        R                  " UUS9n"[        R                   " [#        U5      [#        U5      [#        WU"R$                  5      /5      n#[        R                   " [#        U5      U"R$                  /5      n$U GH  n%U%S::  a  M  U GH  n&Wb  U&U:  a  M  SU;   a  [        R&                  O[        R(                  n'[+        UU%U&UU"U'5      n(SUR,                  U!U	UU(       + UUUU%U&[/        [0        R2                  " 5       5      S.n)[        R5                  SR7                  UU%U&/5      5        U(       a  [9        UU(U)UU%5      n*OUR;                  UU(5      n+U#/n,[=        [        U+5      5       H=  n-U-S:X  a#  [        U   S   S:X  a  U,R?                  U$5        M,  U,R?                  U#5        M?     SU;   a  [        R@                  O[        RB                  n.[E        UU(U)UUU+U U,U%U!U.5      n*[        R5                  U*5        UR?                  U*5        GM     GM     GM-     GMD     U$ ! , (       d  f       GN= fs  snf )Nr   CUDAExecutionProviderzvPlease install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.pt         tfT)enable_all_optimizationnum_threadsverbosecudacpu	cache_dironnxruntimeengineversiondevice	optimizer	precision
io_binding
model_nameinputsthreads
batch_sizesequence_lengthr   z)Run onnxruntime on {} with input shape {}gpt)#r,   get_available_providersloggererrorr   lentorchno_gradr   r   r   get_outputsnamer   from_pretrainednumpyprodmaxhidden_sizeint64int32r   __version__strr   nowinfoformatr   runrangeappendlonglongintcr   )/use_gpumodel_namesmodel_classr2   r&   batch_sizessequence_lengthsrepeat_timesinput_countsoptimize_onnxvalidate_onnxr+   onnx_dirr'   	overwritedisable_ort_io_bindinguse_raw_attention_maskmodel_fusion_statisticsmodel_sourcer,   resultsr4   all_input_names
num_inputsinput_namesonnx_model_fileis_valid_onnx_model
vocab_sizemax_sequence_lengthort_sessionnode_argort_output_namesoutput_buffersr0   configmax_last_state_sizemax_pooler_sizer7   r8   input_value_type
ort_inputsresult_templateresultort_outputsoutput_buffer_max_sizesi	data_types/                                                  g/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime_tools/transformers/benchmark.pyrun_onnxruntimerz   G   s    G+;3V3V3XX E	
 !
 ,Q/&JC00)+:6K|#]]_\u"F:$6q$96*;Ma;PRXYcRdefRgit!['m.	;R]TYO%8*FY %
 |#Xqz 21 5vj7I!7LfU_N`abNcepg-*I7NYPU!4jBU
 '4_5<MQAL=D	FK
 ">I>U>U>WX>W(>WXN&VEF//
iPF"'**[!3'7#8Z!3!346#7 $jj#k*:F<N<N)OPO)
?'7O*6?M`;` 6:l6Ju{{PUP[P[$!9*jRacn:@BR"TJ #0#.#:#:"(%2%.*@&@&0",#.&0+:$'$7'O KK K R RS]T^`oSp!r s .!.{JYegq!r '2oo6F
&S3F2G/!&s;'7!8A Av&*<Q*?5*H 7 > > O 7 > >?R S "9 7;l6JENNPUPZPZ	!>{JXgiu?OQ\^l?VXbdjlu"w KK'NN6*W (8 *K ' "n N] %_,  Ys   :N1N
Nc                 D  ^^ / nU (       a:  [         R                  R                  5       (       d  [        R	                  S5        U$ [         R
                  " S5        U GHq  n[        R                  " XU	S9n[        XXS9n[        R                  " XS9nXR                  ;   a  UR                  U   OSn[        R                  SU 35        [        R                  SUR                  5        35        U[        R                  :X  a  UR                  5         [         R                   " U (       a  S	OS
5      nUR#                  U5        U[        R$                  :X  a  [&        R(                  " U5      nU GHW  nUS::  a  M  U GHD  nUb  UU:  a  M  [        R+                  SR-                  UUU/5      5        [         R.                  " SUR0                  S-
  UU4[         R2                  US9m U(       a   [         R4                  R7                  UT5      OUmT" T5        [8        R:                  " UU4S jUSS9nU(       a  SOS[         R<                  U (       a  SOS
SUSUSUUU[?        [@        RB                  " 5       5      S.nURE                  [G        UU5      5        [        R+                  U5        URI                  U5        GMG     GMZ     GMt     U$ ! [J         a?  n[        RM                  U5        [         R                  RO                  5          S nAGM  S nAff = f)NzYPlease install PyTorch with Cuda, and use a machine with GPU for testing gpu performance.F)torchscriptr+   )rn   r+   custom_model_classr*      zModel zNumber of parameters zcuda:0r)   r   z%Run PyTorch on {} with input shape {}r!   )lowhighsizedtyper0   c                     > T " T5      $ N )	inference	input_idss   ry   <lambda>run_pytorch.<locals>.<lambda>   s
    Yy5I    repeatnumberr|   r>   r(   r   r-   )(r>   r(   is_availabler;   r<   set_grad_enabledr   rB   r   r   max_model_input_sizesdebugnum_parametersr   FLOAT16halfr0   toINT8r   quantize_torch_modelrL   rM   randintrh   longjittracetimeitr   rI   rJ   r   rK   updater   rP   RuntimeError	exceptionempty_cache)rS   rT   rU   r2   r&   rV   rW   rX   r|   r+   r'   rb   r4   rn   model	tokenizermax_input_sizer0   r7   r8   runtimesrt   er   r   s                          @@ry   run_pytorchr      s   Guzz..00pq	5!!
++J[de%j9u!11*R	 &)H)HH #88NR 	 	veW%&,U-A-A-C,DEF	)))JJL'hu=	&"77>E%JQ#3!-/N2RCJJ:XbdsWtuv!MMa/5/@/@1/D0:O/L05

17	9	
-EP		y AV[Ii(%}}-IR^ghiH 4?-G#(#4#4,3&%'%.&(&0"##.&0+:$'$7F MM"4Xz"JKKK'NN6*A $4	 &) "z N	 $ -$$Q'JJ**,,-s   -CK
L	 3L	L	do_eager_modeuse_xlac                 4   ^ ^^^ SS K mSSKJm  U UUU4S jnU$ )Nr   )wrapsc                    >^  T" T 5      U 4S j5       nT" T 5      TR                  TS9U 4S j5       5       nTSL a  TSL d   S5       eU$ U$ )Nc                     > T" U 0 UD6$ r   r   argskwargsfuncs     ry   run_in_eager_modeFrun_with_tf_optimizations.<locals>.run_func.<locals>.run_in_eager_mode   s    (((r   )experimental_compilec                     > T" U 0 UD6$ r   r   r   s     ry   run_in_graph_modeFrun_with_tf_optimizations.<locals>.run_func.<locals>.run_in_graph_mode   s     (((r   TFzcCannot run model in XLA, if `args.eager_mode` is set to `True`. Please set `args.eager_mode=False`.)function)r   r   r   r   r$   r   r   s   `  ry   run_func+run_with_tf_optimizations.<locals>.run_func   st    	t	) 
	) 
t	'	2	) 
3 
	) D 5 utu $$$$r   )
tensorflow	functoolsr   )r   r   r   r$   r   s   `` @@ry   run_with_tf_optimizationsr      s    % %$ Or   c
                   ^^^^ ^! / n
SS K m!T!R                  R                  R                  U5        U (       d  T!R                  R	                  / S5        U (       a6  T!R
                  R                  5       (       d  [        R                  S5        U
$ U (       a}  T!R                  R                  S5      n T!R                  R	                  US   S5        T!R                  R                  R                  US   S5        T!R                  R                  SS9  U[         R"                  :X  d  U[         R$                  :X  a  ['        S5      eU GH!  n[(        R*                  " XS9m[-        UTUUSS	9m [.        R*                  " XS9nXR0                  ;   a  UR0                  U   OS
nU GH  nUS::  a  M  U GH  nUb  UU:  a  M  [        R3                  SR5                  UUU/5      5        SS KnUR9                  5       n[;        UU-  5       Vs/ s H"  nUR=                  STR>                  S-
  5      PM$     nnT!RA                  UUU4T!RB                  S9m [E        SSS9UU 4S j5       n[E        SSS9UU 4S j5       n[E        SSS9UUU U!4S j5       nUmTRF                  (       a  UmO[I        T[J        5      (       a  UmT" 5         [L        RN                  " U4S jUSS9nST!RP                  U (       a  SOSSUSUSUUU[S        [T        RV                  " 5       5      S.nURY                  [[        UU5      5        [        R3                  U5        U
R]                  U5        GM     GM     GM$     U
$ ! [         a   n[        R                  U5         S nAGNS nAff = fs  snf ! [         aG  n[        R                  U5        SSK/J0n  URc                  5       nURe                  5          S nAGM=  S nAff = f)Nr   GPUzVPlease install Tensorflow-gpu, and use a machine with GPU for testing gpu performance.Tz/gpu:0)r0   z+Mixed precision is currently not supported.r*   )rn   r+   r}   is_tf_modelr~   z(Run Tensorflow on {} with input shape {}r!   )shaper   F)r   r   c                     > T" T SS9$ )NF)trainingr   r   r   s   ry   encoder_forward'run_tensorflow.<locals>.encoder_forwardI  s    $Y??r   c                     > T" T T SS9$ )NF)decoder_input_idsr   r   r   s   ry   encoder_decoder_forward/run_tensorflow.<locals>.encoder_decoder_forwardM  s    $Y)V[\\r   c                     > TR                   R                  SSTR                  /5      n TR                   R                  SSTR                  /5      nT" TXSS9$ )Nr!   F)visual_feats
visual_posr   )randomnormalvisual_feat_dimvisual_pos_dim)featsposrn   r   r   r$   s     ry   lxmert_forward&run_tensorflow.<locals>.lxmert_forwardQ  sU     "		 0 0!Q8N8N1O P ii..1f6K6K/LM$YU]bccr   c                     > T " 5       $ r   r   )r   s   ry   r    run_tensorflow.<locals>.<lambda>_  s    Y[r   r   r   r(   r)   r   r-   )r(   )3r   rn   	threading set_intra_op_parallelism_threadsset_visible_devicestestis_built_with_cudar;   r<   list_physical_devicesexperimentalset_memory_growth
distributeOneDeviceStrategyr   r   r   r   r   NotImplementedErrorr   rB   r   r   r   rL   rM   r   RandomrO   r   rh   constantrH   r   is_encoder_decoder
isinstancer   r   r   rI   rJ   r   rK   r   r   rP   numbar(   get_current_devicereset)"rS   rT   rU   r2   r&   rV   rW   rX   r+   r'   rb   physical_devicesr   r4   r   r   r7   r8   r   rngrw   valuesr   r   r   r   rt   r(   r0   rn   r   r   r   r$   s"                                @@@@@ry   run_tensorflowr     s   GII88E
		%%b%0rww1133mn99::5A	 II))*:1*=uEII""445Ea5H$OMM++8+< I%%%inn)D!"OPP!
++JL%j-3099D26	8 "11*R	 &)H)HH #88NR 	 &JQ#3!-/N2RFMMjOY[jNkm n mmoINz\kOkIlmIlA#++a):):Q)>?IlmKKz?6S[][c[cKd	/#.UER@ S@ /UER] S] /UERd Sd
 !0I00$;	#FL99$2	K%}}-@^_`H #/#%>>,3&%'%.&(&0"##.&0+:$'$7F MM"4Xz"JKKK'NN6*m $4	 & "^ Nk  	 Q	 B n\ $ #$$Q'*!446FLLNN	#s8   4A!M2 )N
DN$2
N<NN$
O5	.;O0	0O5	c                     [         R                  " 5       n U R                  SSSS[        / SQ[	        [
        R                  " 5       5      SSR                  [
        R                  " 5       5      -   S9  U R                  S	SS
[        SSS/SS9  U R                  SS[        S [	        [        5      SSR                  [        5      -   S9  U R                  SSSS[        S// SQSS9  U R                  SSS[        [        R                  R                  SS5      SS9  U R                  SS[        [        R                  R                  SS5      SS9  U R                  SS SS!S"S#9  U R                  S$S%[        [        R                  [	        [        5      S&S'9  U R                  S(SS!S)S#9  U R                  S*SS!S+S#9  U R                  S,S-SS!S.S#9  U R                  S/S0SS!S1S#9  U R                  S2S3SS S4S59  U R                  S6S7SS S8S59  U R                  S9S:SS S;S59  U R                  S<S=SSS
/[        / S>QS?S@9  U R                  SASBSSC[        SDSE9  U R                  SFSGS[        S
/SH9  U R                  SISJS[        / SKQSH9  U R                  SLSS!SMS#9  U R                  SSN9  U R                  SOSPSS[        SQ/SRSS9  U R                  5       nU$ )TNz-mz--modelsF+)zbert-base-casedzroberta-basegpt2z Pre-trained models in the list: z, )requirednargstypedefaultchoiceshelpz--model_sourcer!   r    r$   zExport onnx from pt or tfz--model_classz!Model type selected in the list: )r   r   r   r   r   z-ez	--enginesr,   )r,   r>   r|   r   zEngines to benchmarkz-cz--cache_dir.cache_modelsz%Directory to cache pre-trained models)r   r   r   r   z
--onnx_dironnx_modelszDirectory to store onnx modelsz-gz	--use_gpu
store_truezRun on cuda device)r   actionr   z-pz--precisionzfPrecision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization)r   r   r   r   z	--verbosezPrint more informationz--overwritezOverwrite existing modelsz-oz--optimize_onnxz'Use optimizer.py to optimize onnx modelz-vz--validate_onnxzValidate ONNX modelz-fz--fusion_csvz:CSV file for saving summary results of graph optimization.)r   r   r   z-dz--detail_csvz#CSV file for saving detail results.z-rz--result_csvz$CSV file for saving summary results.z-iz--input_counts)r!   r"   r#   zXNumber of ONNX model inputs. Please use 1 for fair comparison with Torch or TorchScript.)r   r   r   r   r   r   z-tz--test_timesd   z8Number of repeat times to get average inference latency.)r   r   r   r   z-bz--batch_sizes)r   r   r   z-sz--sequence_lengths)             @         z--disable_ort_io_bindingz=Disable running ONNX Runtime with binded inputs and outputs. )r^   z-nz--num_threadsr   zThreads to use)r   r   r   r   r   )argparseArgumentParseradd_argumentrJ   listr   keysjoinr   ospathr   FLOAT32intset_defaults
parse_args)parserr   s     ry   parse_argumentsr  {  sa   $$&F
"!&!  K $V[[] 3?$))FKKMBZZ  \ (!&  $!%t8  : !&  $ $] 3@499]C[[  ] #!&! !. U3  5 %!&  "S. AD  F !&  "S- @=	  ? kE,Uij
!!Yu  w eLOgh
lQlm
)!&+F	  H /%[pq
&!& $Y	  [ nudQvw
nudQwx
(!&!!"  )w  y &!& # W  Y oSsQCP
2#CQmn
2!&+\  ^ u5
oSs]^\_fvwDKr   c                  
   [        5       n [        U R                  5        U R                  [        R
                  :X  a'  U R                  (       d  [        R                  S5        g U R                  [        R                  :X  a'  U R                  (       a  [        R                  S5        g [        [        S U R                   5       5      5      U l        [        R                  SU  35        [        R                  R!                  U R"                  5      (       d!   [        R$                  " U R"                  5        SU R(                  ;   nSU R(                  ;   nSU R(                  ;   nS	U R(                  ;   n/ nU R                   GH  n[*        R,                  " U5        [        R/                  [*        R0                  R3                  5       5        U(       d  U(       Ga  U R4                  S
/:w  a  [        R7                  S5        U(       ar  U[9        U R                  U R:                  U R<                  U R                  UU R>                  U R@                  U RB                  SU R"                  U R                  5      -  nU(       ar  U[9        U R                  U R:                  U R<                  U R                  UU R>                  U R@                  U RB                  SU R"                  U R                  5      -  nU(       aq  U[E        U R                  U R:                  U R<                  U R                  UU R>                  U R@                  U RB                  U R"                  U R                  5
      -  n0 nU(       d  GM   SnU[G        U R                  U R:                  U R<                  U R                  UU R>                  U R@                  U RB                  U R4                  U RH                  U RJ                  U R"                  U RL                  U R                  U RN                  U RP                  XU RR                  5      -  nGM     [T        RV                  " 5       RY                  S5      n	W(       a$  U RZ                  =(       d    SU	 S3n
[]        Xz5        [_        U5      S:X  a'  U R>                  S/:w  a  [        R7                  S5        g U R`                  =(       d    SU	 S3n
[c        XZ5        U Rd                  =(       d    SU	 S3n
[g        XZU 5        g ! [&         a&    [        R                  SU R"                  -  5         GNf = f!   [        R                  SSS9   GM  = f)Nzfp16 is for GPU onlyzint8 is for CPU onlyc              3   <   #    U  H  oS ::  a  [         OUv   M     g7f)r   N)	cpu_count).0xs     ry   	<genexpr>main.<locals>.<genexpr>  s     !WFVAv)1"<FVs   zArguments: z#Creation of the directory %s failedr>   r|   r,   r   r!   zB--input_counts is not implemented for torch or torchscript engine.TF	Exception)exc_infoz%Y%m%d-%H%M%Sbenchmark_fusion_z.csvr   zNo any result avaiable.benchmark_detail_benchmark_summary_)4r  r   r'   r2   r   r   rS   r;   r<   r   sortedsetr&   rL   r	  r
  existsr+   mkdirOSErrorenginesr>   set_num_threadsr   
__config__parallel_inforY   warningr   modelsrU   rV   rW   
test_timesr   rz   rZ   r[   r\   r]   r^   ra   r   rK   strftime
fusion_csvr   r=   
detail_csvr	   
result_csvr
   )r   enable_torchenable_torchscriptenable_onnxruntimeenable_tensorflowrb   r&   r`   r_   
time_stampcsv_filenames              ry   mainr3    s    D~~***4<<+,~~'DLL+,c!WdFVFV!WWXD
KK+dV$%77>>$..))	QHHT^^$ dll*L&$,,6&$,,6$4G''k*U%%3356-  QC'cd!;t||T[[$BRBRTXTbTbdo'+'7'79N9NPTP_P_aegkgugu'+||5 5 ;t||T[[$BRBRTXTbTbdo'+'7'79N9NPTP_P_afhlhvhv'+||5 5 ~dllDKKAQAQSWSaSacn&*&6&68M8Mt`d`n`n&*ll4 4G #%:)-&?4<<dFVFVX\XfXfhs+/+;+;T=R=RTXTcTceievev+/+=+=t?Q?QSWSaSacgcpcp+/<<IdId+A\`\m\m	o o5 (D ((9JN,=j\*N !8G
7|qs"NN45??J(9*T&JL7)??K(::,d&KL7$/u  	QLL>OP	QR:yD9s    T <CU,U UU__main__)4__doc__r  loggingr   r   rC   r	  psutilonnxenumr   benchmark_helperr   r   r   r   r	   r
   r   r   r   r   quantize_helperr   onnx_exporterr   r   r   r   	getLoggerr;   huggingface_modelsr   r   r  environrJ   r>   transformersr   r   r   r   r   rz   r   boolr   r   r  r3  __name__r   r   ry   <module>rC     s    2      	   5 5 5 +  			2	 4U+	 BJJ&$'	NBJJ !  X XcLFRT D 2iXnbN0b zF r   