
    iN                     D   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
J
r
  S SKJr  S SKrS SKrS SKrS SKJrJr  \	 " S S5      5       r\	 " S S	5      5       r   SS
 jrS rS rS rS rS rS rS rS rS rS r S r!S r"\#S:X  a
  Sr$\"" 5         gg)    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                       \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   \\S'   \\S	'   \\S
'   \\S'   \\S'   \\S'   \\S'   Srg)TestSetting!   
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_length N)	__name__
__module____qualname____firstlineno__int__annotations__boolstr__static_attributes__r       f/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime/transformers/bert_perf_test.pyr	   r	   !   sI    OOOMM
IM    r"   r	   c                   r    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   \S-  \S'   \S-  \S	'   \\S
'   Srg)ModelSetting2   
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_typer   )r   r   r   r   r    r   r   r!   r   r"   r#   r%   r%   2   s7    ON*$:%Nr"   r%   c                 @   SS K nUR                  U5        U(       a  SUR                  5       ;  a  [        S5        U(       a1  US:X  a  SS/nO)US:X  a  SS/nOUS	:X  a  SS/nOUS
:X  a  / SQnOSS/nOS/nUR	                  5       n	XYl        UR                  R                  U	l        Uc  UR                  R                  U	l        OUS:X  a  UR                  R                  U	l        OUS:X  a  UR                  R                  U	l        OlUS:X  a  UR                  R                  U	l        OJUS:X  a  UR                  R                  U	l        O(US:X  a  UR                  R                  U	l        OXIl        Ub  X9l        UR#                  X	US9n
U(       a  US:X  a  SU
R%                  5       ;   d   eOUS:X  a  SU
R%                  5       ;   d   eO}US	:X  a  SU
R%                  5       ;   d   eO`US
:X  a-  SU
R%                  5       ;   d   eSU
R%                  5       ;   d   eO-SU
R%                  5       ;   d   eOSU
R%                  5       ;   d   eUb;  ['        U5       nU
R)                  [*        R,                  " U5      5        S S S 5        U
$ U
$ ! , (       d  f       U
$ = f)Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProvidermigraphxMIGraphXExecutionProvidercudatensorrt)TensorrtExecutionProviderr0   r3            c   )	providersr8   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDORT_ENABLE_LAYOUTr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r'   r   r   r   rI   r   tuning_results_pathr>   execution_providerssess_optionssessionfs               r#   create_sessionrY   >   s    ++L9+;3V3V3XX N	
 u#9;Q"R#+&# #:<R"S## $;<R"S56--/L&2#"-";";"J"JL'0;0R0R0a0a-	!Q	&0;0R0R0b0b-	!Q	&0;0R0R0c0c-	!Q	&0;0R0R0f0f-	!Q	&0;0R0R0d0d-	!R	'0;0R0R0a0a-0H-',@)**:Ob*cGu)W-B-B-DDDD#.'2G2G2IIII*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE%)>)>)@@@@&%&!&&tyy|4 ' N7N '& Ns   &J
Jc                     [         R                  [        R                  [         R                  [        R                  [         R                  [        R                  [         R
                  [        R                  0nX   $ N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps     r#   
numpy_typere      sH    rzzrzzRXXR[[	H r"   c                 @   U R                  5        VVs0 s H+  u  p4U[        R                  " U5      R                  U5      _M-     nnnUR                  5        VVs0 s H+  u  p4U[        R                  " U5      R                  U5      _M-     nnnXV4$ s  snnf s  snnf r[   )itemsr\   
from_numpyto)inputsoutputsdevicenamearrayinput_tensorsoutput_tensorss          r#   create_input_output_tensorsrq      s    QWQ]Q]Q_`Q_+$T5++E255f==Q_M`RYR_R_RabRa;4dE,,U366v>>RaNb(( abs   2B2Bc           
         U R                  5       nUR                  5        HZ  u  pEUR                  UUR                  R                  S[        UR                  5      UR                  UR                  5       5        M\     UR                  5        HZ  u  pEUR                  UUR                  R                  S[        UR                  5      UR                  UR                  5       5        M\     U$ Nr   )

io_bindingrg   
bind_inputrl   typere   dtypeshapedata_ptrbind_output)sessro   rp   rt   rm   tensors         r#   create_io_bindingr}      s    "J%++-MMv||$LLOO	
 . ',,.MMv||$LLOO	
 / r"   c                    / n/ nUR                   (       a  SOSn[        U5       H  u  pxU R                  X(5      n	UR                  U	5        0 n
[	        [        U5      5       H  nX   XU   '   M     [        XU5      u  p[        XU5      nU R                  U5        [        R                  " 5       nU R                  U5        [        R                  " 5       U-
  nUR                  U5        M     XE4$ )Nr6   cpu)r   	enumeraterunappendrangelenrq   r}   run_with_iobindingtimeitdefault_timer)rW   
all_inputsoutput_namestest_settingresultslatency_listrl   _test_case_idrj   resultrk   iro   rp   rt   
start_timelatencys                    r#   %onnxruntime_inference_with_io_bindingr      s    GL#++VF!*:!6\2vs<()A'-yGO$ * )DFU[(\%&w~N
 	"":.))+
"":.&&(:5G$! "7$   r"   c                 b   [        U5      S:  a&  U R                  U[        R                  " U5      5        / n/ n[	        U5       He  u  pV[
        R                  " 5       nU R                  X&5      n[
        R                  " 5       U-
  n	UR                  U5        UR                  U	5        Mg     X44$ rs   )r   r   randomchoicer   r   r   r   )
rW   r   r   r   r   r   rj   r   r   r   s
             r#   onnxruntime_inferencer      s    
:L&--
";<GL!*:!6))+
\2&&(:5vG$ "7   r"   c                    UR                  5       nS[        R                  R                  U 5       S3nUSUR                   SUR
                   S3R                  SS5      -  nUSUR                   SUR                   S3-  nUS	UR                   S
UR                   S3-  nUSUR                   SUR                   S3-  nUSUR                   S3-  nUSUR                   3-  nU$ )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerI   r   replacer   r   r   r   r   r   r   r   )r'   rW   r   rV   options        r#   	to_stringr      s9   ..0Lbgg&&z2315F
),*O*O)PPfgs  hI  hI  gJ  JK  L  T  T%r F L3344ElFbFbEccdeeF
L334LAXAX@YYZ[[F
--..>|?Z?Z>[[\]]F
()M)M(NaPPF
'(K(K'LMMFMr"   c           
      X   [        U R                  UR                  UR                  UU R                  UR
                  U R                  S9nUR                  5        Vs/ s H  ofR                  PM     nn[        U R                  XQ5      nX;   a  [        SU5        g [        SU5        / n	UR                  (       a<  [        UR                  5       H"  n
[        XSXq5      u  pU	R                  U5        M$     O;[        UR                  5       H"  n
[!        XSU5      u  pU	R                  U5        M$     ["        R$                  " U	5      S-  n[&        R(                  " U5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US5      n["        R*                  " US	5      nUR,                  S
U-  -  nUUUUUUU4X('   [        SR/                  [/        US5      [/        US5      5      5        U R0                  (       a  [2        R4                  R7                  U R0                  5      n[2        R4                  R9                  U5      (       aM  UnUR;                  SS5      S    S[<        R>                  " 5       RA                  5        S3n[        SUSUS5        URC                  5       n[E        US5       n[F        RH                  " UU5        S S S 5        [        SU5        g g s  snf ! , (       d  f       N!= f)N)r   rT   zskip duplicated test:zRunning test:  r&   K   Z   _   r<   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr9   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)%rY   r'   r   r   r+   r   r,   get_outputsrm   r   rA   r   r   r   r   extendr   r^   rn   
statisticsmean
percentiler   formatr-   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrP   rR   dump)model_settingr   perf_resultsr   r   rW   outputr   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrX   s                            r#   run_one_testr      s     !..)>>G /6.A.A.CD.CFKK.CLD
M,,g
DC
%s+	/3""//0B$I\%!G ##L1	 1 //0B$9'|$\!G##L1 1
 *+d2J ooj1Oz2.Jz2.Jz2.Jz2.Jz2.J((F_,DEJ 	L 
6==f_V[>\^deoqv^wx **ggoom&I&IJ77>>+&&)O*11'1=a@A8<<>C[C[C]B^^cfK*o/FU_`((*+s#qIIc1 $*K8 +] El $#s   L(L
L)c                     [         R                  " [        U UUUU4S9nUR                  5         UR	                  5         g )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processs         r#   launch_testr   7  s=    %% 
	G MMOLLNr"   c                 n   UR                   b  [        U UUUUR                   5        g [        R                  " SS9n[        R                  " SS9n[	        XT15      n[        S[        SU5      5       H  nXv;  d  M
  UR                  U5        M     UR                  SS9  U H  n[        XX#U5        M     g )NF)logicalTr9      )reverse)	r   r   psutil	cpu_countlistr   minr   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	            r#   run_perf_testsr   F  s    ((4--	
 	  /I$$T2Mm781c"m,-%$$Q' . 4( 1MK_` !2r"   c                    [        U R                  U R                  U R                  U R                  5      u  p4n[        SUR                   SUR                   SUR                   35        [        UR                  UR                  UR                  UR                  UR                  UUUUR                  UR                  U R                  S9n[        XX&5        g )NzGenerating z samples for batch_size=z sequence_length=)r.   )r   r'   r(   r)   r*   rA   r   r   r   r   r   r   r   r   r.   r   )r   r   r   	input_idssegment_ids
input_maskr   s          r#   run_performancer   ^  s    )8  $$&&%%	*&IJ 

l--..F|G^G^F__pq}  rN  rN  qO  	P $$$,,++))J =Ir"   c            	      t   [         R                  " 5       n U R                  SS[        SS9  U R                  SSS[        SSS	9  U R                  S
SS[        SS9  U R                  SS[        SSS9  U R                  SSS[        SSS9  U R                  SS[        / SQSSS9  U R                  SS[        SSS9  U R                  SSSS S!9  U R                  SS"9  U R                  S#S[        S$/ S%QS&S'9  U R                  S(SSS)S!9  U R                  SS*9  U R                  S+SSS,S!9  U R                  SS-9  U R                  S.S[        S S/S9  U R                  S0S1S[        S S2S9  U R                  S3S[        S S4S9  U R                  S5S[        S S6S9  U R                  S7S[        S S8S9  U R                  S9S [        S:S;9  U R                  S<S [        S=S;9  U R                  S>S?S@[        SAS;9  U R                  SBSCSSSDS!9  U R                  SSE9  U R                  SFS[        S$SGS9  U R                  5       nU$ )HNz--modelTzbert onnx model path)requiredrv   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rv   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rv   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r9   r:   r;   r<   r<   zfonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.)r   rv   choicesr   r   z--seedr;   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr:   )r   r9   r:   r;      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rv   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rv   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr    r   set_defaults
parse_args)parserr   s     r#   parse_argumentsr   {  s=   $$&F
	DsAWX
Z   /   0   Y    u   _   (	   &
=   eLyY
&
*U<Vfg
u-
(    ,   '   )   ,    B	   !@	   #8   "B   u5
|   DKr"   c                     [        5       n U R                  S:X  a'  [        S[        SU R                  -  5      5      U l        U R
                  S::  a  U R                  U l        [        R                  " 5       nUR                  5       n[        U R                  5      n[        U5      S:  a  [        U5      S::  d  [        S5      e[        U R                  U R                   U R"                  U R$                  U R&                  U R(                  U R*                  U R,                  5      nU H  n[/        UU R                  U R                  U R                  U R0                  U R2                  U R4                  U R6                  U R8                  U R:                  U R<                  U R
                  U R>                  5      n[A        SU5        [C        XFU5        M     [E        URG                  5       SS S	9n[H        RJ                  RM                  [O        U R                  5      RP                  S
RS                  U R0                  (       a  SOSSRM                  [E        U5       Vs/ s H  n[U        U5      PM     sn5      U R                  [V        RX                  " 5       R[                  S5      5      5      n	[]        U	SSS9 n
[^        R`                  " U
SSS9nS nU H  u  pURc                  S5      nUcH  / SQnURe                  U Vs/ s H  oRc                  S5      S   PM     sn5        URg                  U5        U Vs/ s H  n[S        US5      PM     nnURe                  U Vs/ s H  oRc                  S5      S   PM     sn5        URg                  U5        M     S S S 5        [A        SU	5        g s  snf s  snf s  snf s  snf ! , (       d  f       N/= f)Nr   r9   r      z batch_size not in range [1, 128]ztest settingFc                     U S   $ )Nr9   r   )xs    r#   <lambda>main.<locals>.<lambda>M  s	    qQRtr"   )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)=r   zTest summary is saved to)4r   r   maxr   samplesr   r   r   Managerdictsetr   r   	Exceptionr%   modelr(   r)   r*   r+   r,   r-   r.   r	   r   r   r   r   r   r   r   r   rA   r   sortedrg   r   r   r   r   parentr   r    r   r   strftimerP   csvwritersplitr   writerow)r   managerr   batch_size_setr   r   r   sorted_resultsr   summary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                    r#   mainr!    s   D!aTDLL%8!9:##q('+';';$%%'G<<>L)N1$^)<)C:;; 

!!""	M %
"  LLOOLLMM%%IILL((''
  	nl+\B% %* L..0%^TN77<<TZZ(//\\EuHHf^&<=&<c!f&<=>  LLN##O4		
L 
lD"	-ZZDN
 .CYYs^F @AQ@A##G,0;<1fQ&F<MMF;Fq773<?F;<'# !/ 
., 

$l37 >(  A =;' 
.	-s=    N2=AON7!O=N<O$OO7O
O__main__)Nr:   N)%r   r  rR   r   r   r   r   r   dataclassesr   r   pathlibr   numpyr^   r   r\   bert_test_datar   r   r	   r%   rY   re   rq   r}   r   r   r   r   r   r   r   r   r!  r   __spec__r   r"   r#   <module>r(     s     
   	    !      > ! ! !      "Rj ).!2! B9Ja0J:_DQ4h zHF	 r"   