
    `i{u                        S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKJ	r	  S SK
JrJrJrJr  S SKJrJrJrJr  S SKJr  \R,                  " \5      r/ SQr\R4                  S\R6                  S\R8                  S	0r " S
 S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r"\!SS4\"SS4\ SS4S.r# " S S5      r$ " S S5      r%g)    N)Path)ListDictTupleUnion)	GPT2ModelGPT2LMHeadModel
GPT2ConfigTFGPT2Model)	Precision)
distilgpt2gpt2zgpt2-mediumz
gpt2-largezgpt2-xlMb@?g?g      @c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )GPT2ModelNoPastState   7Here we wrap a class to disable past state output.
    c                 $   > [         TU ]  U5        g Nsuper__init__selfconfig	__class__s     i/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/onnxruntime_tools/transformers/gpt2_helper.pyr   GPT2ModelNoPastState.__init__            c                 "   > [         TU ]  USSS9$ )NF)	use_cachereturn_dictr   forwardr   	input_idsr   s     r   r%   GPT2ModelNoPastState.forward!   s    wyEuMMr     	__name__
__module____qualname____firstlineno____doc__r   r%   __static_attributes____classcell__r   s   @r   r   r      s    !N Nr    r   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )TFGPT2ModelNoPastState$   r   c                 2   > SUl         [        TU ]	  U5        g )NF)r"   r   r   r   s     r   r   TFGPT2ModelNoPastState.__init__'   s      r    c                     > [         TU ]  USS9$ )NF)r"   )r   callr&   s     r   r%   TFGPT2ModelNoPastState.forward+   s    w|I|77r    r)   r*   r2   s   @r   r4   r4   $   s    !8 8r    r4   c                   H   ^  \ rS rSrSrU 4S jr\S 5       rU 4S jrSr	U =r
$ )MyGPT2Model.   zRHere we wrap a class for Onnx model conversion for GPT2Model with past state.
    c                 $   > [         TU ]  U5        g r   r   r   s     r   r   MyGPT2Model.__init__1   r   r    c           	         [        U S   S   [        5      (       d  [        U S   S   [        5      (       a  [        U S   5      U:X  a  [        U S   S   5      S:X  d   e/ n[	        U5       HY  nUR                  [        R                  " U S   U   S   R                  S5      U S   U   S   R                  S5      4SS95        M[     U S   [        U5      4$ U $ )N   r      )dim)	
isinstancetuplelistlenrangeappendtorchcat	unsqueeze)result	num_layerpresentis       r   post_processMyGPT2Model.post_process4   s    fQilE**j1t.L.Lvay>Y.3vay|3D3IIIG9% uyy&)A,q/*C*CA*Fq	RSUVHaHabcHd)eklmn & 1IuW~..r    c                 x   > [         TU ]  UUUUSS9n[        R                  XPR                  R
                  5      $ NF)position_idsattention_maskpast_key_valuesr#   r   r%   r<   rQ   r   n_layerr   r'   rU   rV   pastrM   r   s         r   r%   MyGPT2Model.forwardA   sA    .:0>15-2	 ! 4
 ''0C0CDDr    r)   )r+   r,   r-   r.   r/   r   staticmethodrQ   r%   r0   r1   r2   s   @r   r<   r<   .   s-    ! 
 
E Er    r<   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )MyGPT2LMHeadModelJ   zXHere we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state.
    c                 $   > [         TU ]  U5        g r   r   r   s     r   r   MyGPT2LMHeadModel.__init__M   r   r    c                 x   > [         TU ]  UUUUSS9n[        R                  XPR                  R
                  5      $ rT   rX   rZ   s         r   r%   MyGPT2LMHeadModel.forwardP   sA    .:0>15-2	 ! 4 ''0C0CDDr    r)   r*   r2   s   @r   r_   r_   J   s    !E Er    r_   c                   8   ^  \ rS rSrSrU 4S jrU 4S jrSrU =r$ )MyGPT2LMHeadModel_NoPaddingZ   zHere we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and no padding.
When you always use batch_size=1 in inference, there is no padding in inputs. In such case, position_ids
and attention_mask need no be in inputs.
c                 $   > [         TU ]  U5        g r   r   r   s     r   r   $MyGPT2LMHeadModel_NoPadding.__init___   r   r    c                    > [         TU ]  XS9$ )N)rW   r$   )r   r'   r[   r   s      r   r%   #MyGPT2LMHeadModel_NoPadding.forwardb   s    wy??r    r)   r*   r2   s   @r   rf   rf   Z   s    !@ @r    rf   logitsTF
last_state)r	   GPT2LMHeadModel_NoPaddingr   c                   <    \ rS rSrS rS\4S jrS\4S jrS r	Sr
g)	
Gpt2Inputsn   c                 4    Xl         X l        X0l        X@l        g r   )r'   rU   rV   r[   )r   r'   rU   rV   r[   s        r   r   Gpt2Inputs.__init__o   s    +4.:JXLP	r    returnc                     U R                   U R                  U R                  4 Vs/ s H	  oc  M  UPM     nnU R                  (       a  UR	                  U R                  5        U$ s  snf r   )r'   rU   rV   r[   extend)r   v
input_lists      r   to_listGpt2Inputs.to_listu   sT    "&..$2C2CTEXEX!Yk!YAa!Y
k99dii(	 ls
   A&A&c                 ~    [        S U R                  U R                  U R                  U R                  4 5       5      $ )Nc              3   .   #    U  H  oc  M  Uv   M     g 7fr   r)   ).0rw   s     r   	<genexpr>&Gpt2Inputs.to_tuple.<locals>.<genexpr>}   s     u c1QQ cs   	)rE   r'   rU   rV   r[   )r   s    r   to_tupleGpt2Inputs.to_tuple|   s3    u1B1BDDWDWY]YbYb cuuur    c                 &   U R                   b'  U R                   R                  [        R                  S9OS nU R                   Vs/ s H  o"R                  [        R                  S9PM!     nn[        U R                  U R                  X5      $ s  snf )N)dtype)rV   torJ   float32r[   rp   r'   rU   )r   rV   pr[   s       r   to_fp32Gpt2Inputs.to_fp32   sp    HLH[H[Hg,,//emm/Dmq3799=9a5==)9=$..$*;*;^RR >s   &B)rV   r'   r[   rU   N)r+   r,   r-   r.   r   r   ry   r   r   r   r0   r)   r    r   rp   rp   n   s(    Q v% vSr    rp   c                   t   \ rS rSrSr\   S,S\S\S\S\S\S\S	\S
\R                  S\	S\	S\	S\
4S jj5       r\ S-S\S\S\S\S\S\\\\   4   4S jj5       r\S 5       r\S.S j5       r\S.S j5       r\S/S j5       r\    S0S\S\	S\	S\	S\	4
S jj5       r\ S.S j5       r\S1S\
S\4S jj5       r\S1S\
S\4S jj5       r\S  5       r\S2S! j5       r\   S3S\
S"\\\R6                  4   S#\\\\   4   S\S$\	S%\	4S& jj5       r\        S4S' j5       r\S5S( j5       r\   S6S\4S) jj5       rS*r g+)7
Gpt2Helper   zJA helper class for Gpt2 model conversion, inference and verification.
    
batch_sizepast_sequence_lengthsequence_lengthnum_attention_headshidden_sizerN   
vocab_sizedevicefloat16has_position_idshas_attention_maskrt   c           	      x   U(       a  [         R                  O[         R                  nSXU[        XC-  5      /n[	        U5       Vs/ s H  n[         R
                  " XUS9PM     nn[         R                  " SUS-
  X4[         R                  US9nSnU
(       aD  X-   n[         R                  " U U/XS9nUS:  a#  [        R                  " SUS-
  5      nSUSS2U4'   SnU	(       aB  UR                  5       R                  S5      S-
  nUR                  US:  S5        USS2US24   n[        UUUU5      $ s  snf )zCreate random inputs for GPT2 model.
Returns torch tensors of input_ids, position_ids, attention_mask and a list of past state tensors.
rB   r   r   r   rA   )lowhighsizer   r   N)rJ   r   r   intrH   randrandintint64onesrandomlongcumsummasked_fill_rp   )r   r   r   r   r   rN   r   r   r   r   r   
float_type
past_shape_r[   r'   rV   total_sequence_lengthpadding_positionrU   s                       r   get_dummy_inputsGpt2Helper.get_dummy_inputs   s<    '.U]]5==
:NPST_TuPvw
QVW`QabQaA

:GQabMMa'1A~(2'D(-)/	1	 $8$J!"ZZ5J(KS]mN$)#)>>!5JQ5N#O 67q"223 *//188<q@L%%lQ&6:'+?+@(@AL)\>4HH- cs   D7r   model_classc                    UR                   nUR                  nUR                  nUR                  n[        U   S   n	XU	S:X  a  UOU/n
SXX-   [        Xe-  5      /nX0n[        U5       H  nXS[        U5      -   '   M     U$ )zJReturns a dictionary with output name as key, and shape as value.
        rA   rl   rB   present_)r   r   num_hidden_layersr   MODEL_CLASSESr   rH   str)r   r   r   r   r   r   r   rN   r   output_namelast_state_shapepresent_state_shapeoutput_shapesrP   s                 r   get_output_shapesGpt2Helper.get_output_shapes   s     %88((,,	&&
#K03&{V^G^dopz0D0V12

 %7y!A1D*s1v-. " r    c                    U H|  nX ;   d   eX   n[         R                  " X   5      UR                  5       :  d  M:  [        R                  " [         R                  " X   5      UR
                  UR                  S9X'   M~     g )Nr   )numpyprodnelementrJ   emptyr   r   )output_buffersr   keybuffers       r   auto_increase_buffer_size$Gpt2Helper.auto_increase_buffer_size   si     C(((#(Fzz-,-0AA&+kk%**]=O2P8>9?'H#	 !r    c                     U(       a  [         R                  O[         R                  n0 nU R                  5        H0  u  pV[         R                  " [
        R                  " U5      X1S9XE'   M2     U$ )zyReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.
        r   )rJ   r   r   itemsr   r   r   )r   r   
is_float16	data_typer   nameshapes          r   get_output_buffersGpt2Helper.get_output_buffers   sQ     &0EMMU]]	(..0KD#(;;uzz%/@	#aN  1r    c                    U S   R                  5       R                  5       n[        R                  " X1S   -
  5      nU(       a0  [        R                  " U[        R                  " U5      S-   -  5      $ [        R                  " U5      $ )zPReturns the maximum difference between PyTorch and OnnxRuntime outputs.
        r   gư>)cpur   absamax)torch_outputsort_outputsrelativeexpected_outputsdiffs        r   diff_outputsGpt2Helper.diff_outputs   sl     )+//1779yy)N:;::deii0@&AD&HIJJ::d##r    c           	      $   [         R                  " US   U S   R                  5       R                  5       X#S9n[        R	                  SU 35        Un[        U5      S-
  n[        U5       Hk  n[         R                  " USU-      U S   U   R                  5       R                  5       UUS9n[        R	                  SU SU SU 35        U=(       a    UnMm     U(       d.  [        R                  X5      n[        R                  SUS	 35        U$ )
zcReturns True if torch and ORT outputs are close for given thresholds, and False otherwise.
        r   rtolatolz9PyTorch and OnnxRuntime output 0 (last_state) are close: rA   zPyTorch and OnnxRuntime layer z state (present_z) are close:z@PyTorch and OnnxRuntime results are not all close: max_abs_diff=z.5f)
r   allcloser   loggerdebugrG   rH   r   r   info)	r   r   r   r   is_closeis_all_close
num_layerslayermax_abs_diffs	            r   compare_outputsGpt2Helper.compare_outputs   s    >>+a.-2B2F2F2H2N2N2PW[gPQYPZ[\%)
:&E~~k!e)&<&3A&6u&=&A&A&C&I&I&K+/+/1H LL9%@PQVPWWcdlcmno'4HL ' %22=NLKKZ[ghkZlmnr    onnx_model_pathverboseuse_external_data_formatc                 \   U R                   nUR                  n[        R                  SSSUR                  UR
                  UUR                  USUUS9n	U	R                  5       n
[        R                  " 5          U " U
6 nSSS5        [        U5       Vs/ s H  nSU 3PM
     nn[        U5       Vs/ s H  nSU 3PM
     nnWS   R                  S   UR                  :X  d"  US   R                  S   UR
                  :X  d   eUS   R                  S   UR                  :X  a  S	OS
/U-   nSSSS.US   SSS.0nU H  nSSS.UU'   M     U H  nSSS.UU'   M     S/nU(       a  SSS.US'   UR                  S5        U(       a  SSS.US'   UR                  S5        UR                  U5        [        U5      S:X  a  [        US   5      U:X  d   e[        R!                  SU	R"                  R                   SU	R$                  S   R                   SUS   R                   SUS   S   R                   35        ['        U5      R(                  R+                  SSS9  [        R,                  R/                  U [1        U
5      UUUUUSSUUS9  g! , (       d  f       GN= fs  snf s  snf )z:Export GPT-2 model with past state to ONNX model.
        rA   Fr   r   r   r   r   rN   r   r   r   r   r   Npast_r   r   rB   rl   rm   r'   r   seq_len)r   rA   past_seq_len)rA      total_seq_lenrU   rV   zShapes: input_ids=z past=z output=z	 present=T)parentsexist_ok   )
argsfinput_namesoutput_namesexample_outputsdynamic_axesopset_versiondo_constant_foldingr   r   )r   rY   r   r   r   r   r   ry   rJ   no_gradrH   r   rI   rv   rG   r   r   r'   r[   r   parentmkdironnxexportrE   )modelr   r   r   r   r   r   r   rN   dummy_inputsrx   outputsrP   
past_namespresent_namesr   r   r   r   s                      r   export_onnxGpt2Helper.export_onnx  s    #\\NN	!22aHICDGMGaGa?E?Q?Q=F>D>O>O:@;@DTFX 3 
Z "))+
]]_Z(G  ,1+;<+;aaSk+;
<16y1AB1AA8A31AB qz"f&7&7771:;K;KA;NRXRdRd;ddd$+AJ$4$4Q$76;L;L$LR^_boo $)%DlSTo[gluWvwD%1n!EL !D%1o!FL " #m/;	+JL(~.1=/-RL)*/0:&7|q S_	%AAA !7!7!=!= >f\EVEVWXEYE_E_D``hipqrisiyiyhz  {D  EL  MN  EO  PQ  ER  EX  EX  DY  Z	
 	_$$**4$*G

%$Z0+&1'3*1'3(*.23K") 	 
	+S _ =Bs   9JJ$2J)
J!c           
      v    SSK Jn  U" U SUUSSSS9nU(       a  UR                  SS9  UR                  X5        g)zQOptimize ONNX model with an option to convert it to use mixed precision.
        r   )optimize_modelr   NF)
model_type	num_headsr   	opt_leveloptimization_optionsuse_gpu)cast_input_output)	optimizerr   convert_model_float32_to_float16save_model_to_file)r   optimized_model_pathr   r   r   r   r  ms           r   optimize_onnxGpt2Helper.optimize_onnxS  sJ     	-?&,%8'2%&04#(* ...G	1Lr    inputs
total_runsc                    [         R                  S5        UR                  5       R                  5       n[        R
                  " 5          U " U6 nSSS5        US:X  a  W$ / n[        R
                  " 5          [        U5       HD  n[        R                  " 5       nU " U6 nUR                  [        R                  " 5       U-
  5        MF     SSS5        [        U5      S-  [        U5      -  n[         R                  SR                  [        US5      5      5        WU4$ ! , (       d  f       N= f! , (       d  f       Nk= f)zoRun inference of PyTorch model, and returns average latency in ms when total_runs > 0 besides outputs.
        zstart pytorch_inferenceNr     zPyTorch inference time = {} ms.2f)r   r   r   ry   rJ   r   rH   timerI   sumrG   format)	r   r  r  rx   r   latencyr   startaverage_latencys	            r   pytorch_inferenceGpt2Helper.pytorch_inferencei  s     	./ ^^%--/
]]_Z(G  ?N]]_:&		,tyy{U23 '  g,-G<5<<VOUZ=[\]''! _ _s   	D6AD/
D,/
D=c                 ,   [         R                  S5        S[        R                  " UR                  R                  5       R                  5       5      0nUR                  bV  [        UR                  5       H=  u  pE[        R                  " UR                  5       R                  5       5      USU 3'   M?     UR                  b?  [        R                  " UR                  R                  5       R                  5       5      US'   UR                  b?  [        R                  " UR                  R                  5       R                  5       5      US'   U R                  SU5      nUS:X  a  U$ / n[        U5       HQ  n[        R                  " 5       n	U R                  SU5      nUR                  [        R                  " 5       U	-
  5        MS     [        U5      S-  [        U5      -  n
[         R                  S	R!                  [!        U
S
5      5      5        Xj4$ )zlRun inference of ONNX model, and returns average latency in ms when total_runs > 0 besides outputs.
        zstart onnxruntime_inferencer'   Nr   rV   rU   r   r  z"OnnxRuntime Inference time = {} msr  )r   r   r   ascontiguousarrayr'   r   r[   	enumeraterV   rU   runrH   r  rI   r  rG   r  )ort_sessionr  r  
ort_inputsrP   past_ir   r  r   r  r  s              r   onnxruntime_inference Gpt2Helper.onnxruntime_inference  s    	24!5#:#:6;K;K;O;O;Q;W;W;Y#Z[
;;"&v{{3	*/*A*A&**,BTBTBV*W
U1#;' 4   ,+0+B+B6CXCXC\C\C^CdCdCf+gJ'(*).)@)@ATATAXAXAZA`A`Ab)cJ~&!oodJ7?z"AIIKE%//$
;KNN499;./ #
 g,-G<9@@Y^A_`a++r    c                    U R                  5       nUR                  5       (       d   eUR                  SUR                  R                  S[
        R                  [        UR                  5       5      UR                  5       5        XPR                  5       S   R                     R                  nU[        R                  :X  a  [
        R                  O[
        R                  n	Ub  [!        U5       H  u  pUR                  5       (       d   eUR                  5       nUS:X  a  UR                  5       nUR                  SU
 3UR                  R                  SU	[        UR                  5       5      U5        M     Ubf  UR                  5       (       d   eUR                  SUR                  R                  SU	[        UR                  5       5      UR                  5       5        Ubt  UR                  5       (       d   eUR                  SUR                  R                  S[
        R                  [        UR                  5       5      UR                  5       5        U R                  5        H  nUR                  nX^   n["        R%                  U SUR                  R                   S[        UR                  5       5       35        UR'                  XR                  R                  SXU   UR                  5       5        M     U$ )z2Returnas IO binding object for a session.
        r'   r   r   rV   rU   z device type=z shape=)
io_bindingis_contiguous
bind_inputr   typer   longlongrF   r   data_ptrget_outputsr   r   rJ   r   r   r#  r   r   bind_output)r%  r'   rU   rV   r[   r   r   r+  r   r   rP   r'  r0  outputr   output_buffers                   r   prepare_io_bindingGpt2Helper.prepare_io_binding  s    !++-
 &&((((k9+;+;+@+@!U^^UYZcZhZhZjUk'002	4 ##:#:#<Q#?#D#DEKK	&/5==&@U]]emm
&t_	++----!??,q=  )113H%%aSk6==3E3Eq*VZ[a[f[f[hVikst - %!//1111!!"2N4I4I4N4NPQS]"&~':':'<"=~?V?V?XZ #--////!!.,2E2E2J2JAu~~"&|'8'8':";\=R=R=TV "--/F ++K*7MLLK=m6J6J6O6O5PPWX\]j]o]o]qXrWstu"";0D0D0I0I1jhsZt#0#9#9#;=	 0 r    c                 v   / nU R                  5        H  nUR                  nX   nX&   nUS[        R                  " U5       R	                  U5      R                  5       R                  5       n	U(       a/  UR                  U	R                  5       R                  5       5        M  UR                  U	5        M     U$ )z<Copy results to cpu. Returns a list of numpy array.
        r   )	r1  r   r   r   reshapeclonedetachrI   r   )
r%  r   r   return_numpyr   r3  r   r   r   copy_tensors
             r   "get_outputs_from_io_binding_buffer-Gpt2Helper.get_outputs_from_io_binding_buffer  s     !--/F ++K#0F!.E 5::e#45==eDJJLSSUK"";??#4#:#:#<="";/ 0 r    r   r   r;  include_copy_output_latencyc           	         [         R                  S5        [        R                  XR                  UR
                  UR                  UR                  X#5      nU R                  U5        [        R                  XUU5      nUS:X  a  U$ / n	[        U5       Hn  n
[        R                  " 5       nU R                  U5        U(       a  [        R                  XUU5      n
U	R                  [        R                  " 5       U-
  5        Mp     [        U	5      S-  [        U	5      -  n[         R                  SR                  [        US5      5      5        X4$ )z^Inference with IO binding. Returns outputs, and optional latency when total_runs > 0.
        z*start onnxruntime_inference_with_binded_ior   r  z2OnnxRuntime with IO binding inference time = {} msr  )r   r   r   r5  r'   rU   rV   r[   run_with_iobindingr=  rH   r  rI   r  rG   r  )r%  r  r   r   r  r;  r?  r+  r   r  r   r  r  s                r   $onnxruntime_inference_with_binded_io/Gpt2Helper.onnxruntime_inference_with_binded_io  s%    	AC  22;@P@PRXReRe393H3H&++Wev
 	&&z2 !CCKanDPR ?z"AIIKE**:6*AA+_lBNPNN499;./ # g,-G<IPPQWXginQopq++r    c                    UR                   n[        R                  SU SU SU SU SU SU S35        SnS	nS
nSnU(       a-  [        R	                  XXU5      n[        R                  UX#5      nSn[        U5       GH4  n[        R                  " SU5      n[        R                  " SU5      n[        R                  " SU5      n[        R                  SU SU S35        [        R                  UUUUR                  UR                  UR                  UR                  X#U	U
5      n[        R                  UU5      nU(       a  [        R!                  U U5      nO1[        R	                  UUUUU5      n[        R#                  U UUU5      n[        R%                  UUXES9nU(       d  GM/  US-  nGM7     [        R                  SU SU 35        USU-  :  a)  [        R                  S['        US-  U-  5      S S35        UU:H  $ )zTGenerate random inputs and compare the results of PyTorch and Onnx Runtime.
        zRunning parity test (rtol=z, atol=z, test_cases=z, use_io_binding=z model_class=z is_float16=z) ...      rB   Nr   rA   z#Running parity test for batch_size=z past_sequence_length=z...r   zParity Test Cases=z	; Passed=gffffff?zParity is good: passed rate=d   z.0f%)r   r   r   r   r   r   rH   r   r   r   r   r   r   rY   r   r  r(  rB  r   r   )r%  r   r   r   r   r   total_test_casesuse_io_bindingr   r   r   r   max_batch_sizemax_past_seq_lenmax_seq_lenr   max_output_shapespassed_test_casesr   r   r   r   r   r   r   r   r   s                              r   test_parityGpt2Helper.test_parity  sY    #\\(gdV=IYHZZklzk{  |I  JU  IV  Vb  cm  bn  ns  t	
  * < <^_j=H!J'::;LfaN'(A$nnQ<O#)>>!5E#F >:JLL5j\AWXlWmmpqs%66zCWYh7=7Q7QSYSeSegmgugu7=7H7H&^n7IKL
 !225,GG(>>{LY * < <ZI]_npv=H!J(MMk[giwN[] &55g{QU5aL|!Q&!/ )0 	()9(:)DUCVWXt&666KK6s;LS;PQa;a7bcf6gghij $444r    c                     [         R                  SSSUR                  UR                  UR                  UR
                  USUUS9R                  5       n[        R                  R                  X5      $ )z#JIT trace for TorchScript.
        rA   Fr   )
r   r   r   r   rY   r   ry   rJ   jittrace)r   r   r   r   r   rx   s         r   torchscriptGpt2Helper.torchscriptK  su      00AFGABEKE_E_=C=O=O;A>><B<M<M8>9>BRDV 1 
X Y`X_Xa 	 yyu11r    c           
         Un[         R                  " SU5      (       d>  [        R                  R	                  U5      (       d   e[        U5      R                  S   nUS:w  a  USU-   -  nU(       a  US-  nU(       Ga  [        R                  R                  [        R                  R                  X5      US-   5      [        R                  R                  [        R                  R                  XS-   5      US-   5      [        R                  R                  [        R                  R                  XS	-   5      US
-   5      [        R                  R                  [        R                  R                  XS-   5      US-   5      S.$ [        R                  R                  XS-   5      [        R                  R                  XS-   5      [        R                  R                  XS
-   5      [        R                  R                  XS-   5      S.$ )zFBuild a  path name for given model based on given attributes.
        z	^[\w_-]+$r   r	   r   _pastz.onnx_fp32z
_fp32.onnx_fp16z
_fp16.onnx_int8z
_int8.onnx)rawfp32fp16int8)rematchospathisdirr   partsjoin)
output_dirmodel_name_or_pathr   has_past
new_folder
model_names         r   get_onnx_pathsGpt2Helper.get_onnx_paths\  s    (
xx&89977==!344440177;J++#++J'!J ww||BGGLL$H*W^J^_RWW\\*7>R%SU_bnUnoRWW\\*7>R%SU_bnUnoRWW\\*7>R%SU_bnUno	  77<<
,@AGGLL,-FGGGLL,-FGGGLL,-FG	
 	
r    r)   N)FTT)r	   )F)MbP?rn  )FFTT)r   )T)r   TF)Fr   r   rG  Tr	   TT)TT)r	   TF)!r+   r,   r-   r.   r/   r]   r   rJ   r   boolrp   r   r
   r   r   r   r   r   r   r   r   r  r  r  r(  r5  r=  TensorrB  rP  rU  rl  r0   r)   r    r   r   r      s    */2648'IS 'I/2'I*-'I /2'I '*	'I
 %('I &)'I "''I #''I ,0'I .2'I >H'I 'IR 
 .?	c 03+. #- (+	 DHTRUYCW	 4 H H   $ $  .  %*5:-1/3K+%(K+ "K+ /3	K+
 '+K+ )-K+ K+Z  05M M* ( ( ( (4 ,: ,3 , ,@ - -^   
 @ABFQV&,5?&,=A#u||BS=T&, =Ad3i<P&, :=	&,
 <@&, KO&, &,P   %%(#' 1%)'+:5 :5x 2 2   +< $"'	 
$' 
  
r    r   )&rb  loggingrJ   r   r   r   r  r`  pathlibr   typingr   r   r   r   transformersr   r	   r
   r   benchmark_helperr   	getLoggerr+   r   PRETRAINED_GPT2_MODELSFLOAT32FLOAT16INT8DEFAULT_TOLERANCEr   r4   r<   r_   rf   r   rp   r   r)   r    r   <module>r|     s    
       	  + + L L &			8	$W &&	0A0A3	X[\ N9 N8[ 8E) E8E E 	@/ 	@ *8T:"=x!O|T2S S.x
 x
r    