+
    9iW                        ^ RI t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIHtH	t	 ^ RI
Ht ^ RIHt ^ RIHt ^ RIHt ^ RIHt ^ RIt^ RIt^ RIt^ RIHt ^ RIt]P0                  ! ]4      t ! R	 R
]4      t ! R R]4      t ! R R4      tR]P<                  /tRRR'RRR/ 3R lt R(R lt!R)R lt"R t#R t$R t%R t&R*R lt']PP                  ^ 3R lt)R t*R+R lt+R R lt, ! R R ]4      t- ! R! R"]-4      t. ! R# R$]-4      t/R,R% lt0R& t1R# )-    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)Any)versionc                   6   a  ] tR t^t o RtRtRtRtR tRt	V t
R# )	Precisionfp32fp16int8int4c                    V P                   # Nvalueselfs   &g/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/benchmark_helper.py__str__Precision.__str__%       zz     N)__name__
__module____qualname____firstlineno__FLOAT32FLOAT16INT8INT4r   __static_attributes____classdictcell____classdict__s   @r   r   r      s#     GGDD r   r   c                   2   a  ] tR t^)t o RtRtRtR tRtV t	R# )OptimizerInfono_optby_ort	by_scriptc                    V P                   # r   r   r   s   &r   r   OptimizerInfo.__str__0   r   r   r   N)
r   r   r   r    NOOPTBYORTBYSCRIPTr   r%   r&   r'   s   @r   r*   r*   )   s       EEH r   r*   c                   2   a  ] tR t^4t o R tR tR tRtV tR# )ConfigModifierc                    Wn         R # r   
num_layers)r   r7   s   &&r   __init__ConfigModifier.__init__5   s    $r   c                   V P                   f   R # \        VR4      '       d4   V P                   Vn        \        P	                  RV P                    24       \        VR4      '       d4   V P                   Vn        \        P	                  RV P                    24       \        VR4      '       d6   V P                   Vn        \        P	                  RV P                    24       R # R # )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r7   hasattrr;   loggerinfor<   decoder_layers)r   configs   &&r   modifyConfigModifier.modify8   s    ??"6.//'+F$KKPQUQ`Q`Pabc6+,,$(OOF!KKQRVRaRaQbcd6,--$(OOF!KKQRVRaRaQbcd .r   c                    V P                   # r   r6   r   s   &r   get_layer_numConfigModifier.get_layer_numE   s    r   r6   N)	r   r   r   r    r8   rB   rE   r%   r&   r'   s   @r   r4   r4   4   s     %e r   r4   float32TFc	                 ^   \         P                  ! 4       p	V'       d!   \         P                  P                  V	n        M\         P                  P
                  V	n        V'       d   R V	n        V^ 8  d)   WIn        \        P                  RV	P                   24       V'       d	   ^ V	n
        M^V	n
        V\         P                  ! 4       9   d   V.p
MMV'       dC   VR8X  d   RR.p
M9VR8X  d   RR.p
M-VR8X  g   Vf   R	R.p
MVR
8X  d   . ROp
M\        RV 24      hR.p
V'       d"   V
 Uu. uF  qV9   d   WV,          3MTNK  	  p
pV'       d   V	P                  RR4       Rp \         P                  ! W	V
R7      pV# u upi   \         d     \        P!                  RT  RT
 24        T# i ; i)Tz%Session option: intra_op_num_threads=dmlDmlExecutionProviderCPUExecutionProvidermigraphxMIGraphXExecutionProvidercudaNCUDAExecutionProvidertensorrtz)The execution provider is not supported: z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providerszFailed to create session for z with providers=)TensorrtExecutionProviderrO   rK   )onnxruntimeSessionOptionsGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_ENABLE_BASICenable_profilingintra_op_num_threadsr>   debuglog_severity_levelget_available_providersRuntimeErroradd_session_config_entryInferenceSession	Exception	exception)onnx_model_pathuse_gpuproviderenable_all_optimizationnum_threadsrZ   verbose(enable_mlas_gemm_fastmath_arm64_bfloat16provider_optionssess_optionsrR   namesessions   &&&&&&&&&    r   create_onnxruntime_sessionro   O   s    --/L0;0R0R0a0a-0;0R0R0c0c-(,%Q,7)<\=^=^<_`a*+'*+';6688J		u/1GHI#+&I 8#302HII#I !J8*UVV+,	fopfo^b?O7OdT23UYYfo	p/--.XZ]^Gg..Xab N q  g88IIYZcYdefNgs   )E=#F &F,+F,c                    V '       d)   \         P                  ! R \         P                  R7       R# \         P                  ! R\         P                  R7       \         P                  ! R4      P                  \         P                  4       R# )z8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)formatlevelz%(message)stransformersN)loggingbasicConfigDEBUGINFO	getLoggersetLevelWARNING)ri   s   &r   setup_loggerr{      sP    M--	

 	=E.)227??Cr   c                 J   V '       d<   \         P                  P                  V 4      '       g   \         P                  ! V 4       V'       d<   \         P                  P                  V4      '       g   \         P                  ! V4       V'       df   VR 8X  d#   R\        P
                  ! 4       9   g   Q R4       hM<\        \        P
                  ! 4       4      P                  RR.4      '       d   Q R4       h\        P                  R\        P                   24       \        P                  R\        P                   24       \        P                  R\        P                   24       \        P                  ! \        P                  4      \        P                  ! R	4      8  g   Q h\        P                  ! \        P                  4      \        P                  ! R
4      8  g   Q h\        P                  ! \        P                  4      \        P                  ! R	4      8  g   Q hR# )rI   rJ   zBPlease install onnxruntime-directml package to test GPU inference.rO   rM   zSPlease install onnxruntime-gpu package, or install migraphx, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0N)ospathexistsmakedirsrT   r^   set
isdisjointr>   r?   torch__version__rs   r
   parse)	cache_dir
output_dirre   rf   s   &&&&r   prepare_environmentr      sw   	22
I"''..44
Ju)[-P-P-RR TR
 ;>>@ALL(*EF  ede  KK"5#4#4"567
KK'(@(@'ABC
KK&{'>'>&?@A ==**+w}}X/FFFF==112gmmH6MMMM==001W]]85LLLLr   c                    \        V 4      \        \        V 4      4      ,          R ,          p\        P                  ! V \        P
                  R7      R ,          pVR V,          ,          pR\        V 4      RVR R\        P                  ! V ^Z4      R ,          R R\        P                  ! V ^_4      R ,          R R\        P                  ! V ^c4      R ,          R RVR R	VR /# )
g     @@)dtype
test_timeslatency_variancez.2flatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarfloat64
percentile)latency_list
batch_size
latency_msr   
throughputs   &&   r   get_latency_resultr      s    \"U3|+<%==FJyyU]]CfLv
23J 	c,'/4E$4$4\2$F$OPS#TE$4$4\2$F$OPS#TE$4$4\2$F$OPS#TC 0*S! r   c                 &   \        VR RRR7      ;_uu_ 4       p. ROp\        P                  ! W#R7      pVP                  4        V  F  pVP	                  V4       K  	  RRR4       \
        P                  RV 24       R#   + '       g   i     L*; i)a asciimodenewlineencoding
fieldnamesNz&Detail results are saved to csv file: )enginer
   rR   device	precision	optimizer
io_binding
model_nameinputsthreadsr   sequence_lengthcustom_layer_numr   r   r   r   r   r   r   r   )opencsv
DictWriterwriteheaderwriterowr>   r?   )resultscsv_filenamecsv_filecolumn_names
csv_writerresults   &&    r   output_detailsr      sv    	lb7	C	Cx
0 ^^HF
 F' 7 
D< KK8GH= 
D	Cs   AB  B	c                    \        VR RRR7      ;_uu_ 4       p. ROp. pVP                   FU  pVP                  R.8X  d   VP                  R	V 24       K+  VP                   F  pVP                  R	V R
V 24       K  	  KW  	  \        P
                  ! W4V,           R7      pVP                  4        VP                   EF  p	R EF}  p
VP                   EFh  pR EF]  pVP                   EFH  p/ pV  EF  pVR,          V	8X  g   K  VR,          V
8X  g   K$  VR,          V8X  g   K4  VR,          V8X  g   KD  VR,          V8X  g   KT  VP                  4        UUu/ uF  w  ppVV9   g   K  VVbK  	  pppV'       g8   VP                  V4       VP                  \        P                  VR4      4       M V F  pVV,          VV,          8X  d   K  Q h	  VR,          pVR,          pV'       d   VR,          VR	V R
V 2&   EK  VR,          VR	V 2&   EK"  	  V'       g   EK7  VP                  V4       EKK  	  EK`  	  EKk  	  EK  	  EK  	  RRR4       \        P!                  RV 24       R# u uppi   + '       g   i     L0; i)r   r   r   r   r   r   r   r   r   b_sr   r   r   r   Nz'Summary results are saved to csv file: )r   r   r   r   r
   rR   r   r   r   r   r   )         )TFr   )r   batch_sizessequence_lengthsappendr   r   r   modelsenginesrh   itemsupdatedictfromkeysr   r>   r?   )r   r   argsr   header_names
data_namesr   r   r   r   input_countengine_namer   r   rowr   kvheadersr   ss   &&&                  r   output_summaryr      s4   	lb7	C	Cx
 
**J$$,!!Aj\"23'+'<'<O%%*R7H&IJ (=	 + ^^H
9RS
 ++J(#'<<K&7
'+'7'7G"$C*1$*<$8J$F(.x(8K(G(.x(8K(G(.|(<
(J(.y(9W(D@F.d1RSWcRctq!tG.d+.(+

7(;(+

4==R3P(Q1=A36q6WQZ3G,G3G 2>(.|(<A(./@(AA'(<BCW<Xas"QCL(97=>R7SasG) +2*  #s * 3 3C 81 (8 '8 $0  ) &1 
Dl KK9,HI! /eM 
D	CsU   C)I3I3I3&I36I3I3I-*I-1AI3I3-.I3!*I3-I33J	c                 l   \        VR RRR7      ;_uu_ 4       p. RO\        \        \        V P	                  4       4      4      P                  4       4      Op\        P                  ! W#R7      pVP                  4        V  F~  p\        \        P                  ! 4       4      W,          R&   \        P                  W,          R&   \        P                  W,          R&   WPV,          R&   VP                  W,          4       K  	  R	R	R	4       \         P#                  R
V 24       R	#   + '       g   i     L*; i)r   r   r   r   model_filenamer   rs   r   r   Nz(Fusion statistics is saved to csv file: )r   r   rs   r   )r   listnextitervalueskeysr   r   r   strr   nowrs   r   r   r   r>   r?   )model_fusion_statisticsr   r   r   r   keys   &&    r   output_fusion_statisticsr   &  s    	lb7	C	Cx

 $t3::<=>CCEF
 ^^HF
 *C7:8<<>7J#(4;G;S;S#(8494E4E#(1=@C()9: 7 <= + 
D  KK:<.IJ! 
D	Cs   C)D##D3	c                   a a / p\         P                  ! VV 3R  l^VR7       \         P                  ! VV 3R l^VR7      pVP                  V4       VP                  RR/4       VP                  \        Wt4      4       V# )c                  (   < SP                  R S 4      # r   run
ort_inputsort_sessions   r   <lambda>inference_ort.<locals>.<lambda><  s    +//$
;r   numberrepeatc                  (   < SP                  R S 4      # r   r   r   s   r   r   r   =  s    z)Jr   r   F)timeitr   r   r   )r   r   result_templaterepeat_timesr   warm_up_repeatr   r   s   ff&&&&  r   inference_ortr   :  sb    F
MM;An]==!JST]ijL
MM/"
MM<'(
MM$\>?Mr   c           
        a a / pS P                  4       oV F  p\        P                  ! W,          4      P                  V	4      p\        P                  \        W,          P                  4      V
4      pSP                  VVP                  P                  ^ VVP                  VP                  4       4       K  	  \        V4      ^ 8X  d   \        WgV	4       \        V4       Fk  w  ppSP!                  VVV,          P                  P                  ^ \"        P$                  VV,          P                  VV,          P                  4       4       Km  	  \&        P(                  ! VV 3R l^VR7       \&        P(                  ! VV 3R l^VR7      pVP+                  V4       VP+                  RR/4       VP+                  \-        VV4      4       V# )r   c                  &   < SP                  S 4      # r   run_with_iobindingr   r   s   r   r   /inference_ort_with_io_binding.<locals>.<lambda>q      ..z:r   r   c                  &   < SP                  S 4      # r   r   r   s   r   r   r   w  r   r   r   T)r   r   
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r   
bind_inputr   typeshapedata_ptrr   allocateOutputBuffers	enumeratebind_outputr   rG   r   r   r   r   )r   r   r   r   ort_output_namesort_outputsoutput_buffersoutput_buffer_max_sizesr   r   	data_typer   r   rm   np_input
input_typeiort_output_namer   r   s   f&&&&&&&&&&&       @r   inference_ort_with_io_bindingr  D  s|    F '')J##J$4588@-11#j6F6L6L2MyY
OO  NN	
  >anvN'(89?1$$))MMN  1&&(	
 : MM: ==:L
 MM/"
MM<&'
MM$\:>?Mr   c           	          V F9  pV P                  \        P                  ! V\        P                  VR 7      4       K;  	  R# ))r   r   N)r   r   emptyrG   )r  r  r   r  s   &&& r   r  r    s-     %ekk!5==PQ %r   c                   \         P                  ! V 4       \        P                   P                  V 4       \        P                  ! V 4       \        P
                  P	                  V 4       \        P
                  P                  V 4       R# )z5Set random seed manually to get deterministic resultsN)randomseedr   r   manual_seedrN   manual_seed_all)r  s   &r   set_random_seedr    sR    
KK	LLd	d	JJ4 	JJt$r   c                j    V ^8  d   QhR\         \        \        \        3,          ,          R,          /# r   returnNr   r   r   r	   )rq   s   "r   __annotate__r    s%     # #d4S>*T1 #r   c                     ^ RI Hp HpHpHpHpHpHp  V! 4        . pV! 4       p\        V\        4      '       g   R# \        V4       Fo  p	V! V! V	4      4      p
\        V
\        4      '       d    R# VP                  RV	RV! V! V	4      4      RV
P                  RV
P                  RV
P                  /4       Kq  	  V! 4        V#   T  d   p\!        RT4        Rp?R# Rp?ii ; i)	r   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdownNidrm   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr"  r#  r$  r%  r&  r'  r(  
isinstanceintranger   r   r*  r+  r,  print)r"  r#  r$  r%  r&  r'  r(  r   device_countr  r?   errors               r   get_gpu_infor5    s      
)+,,,|$A*+Ea+HID$$$MM!-.H.KLTZZDIIDII	 % 	 =uEs#   &C 3C 1AC C!
CC!c                   L   a  ] tR tRt o RR ltR t]V 3R lR l4       tRtV t	R# )	MemoryMonitori  c                    Wn         R # r   keep_measuring)r   r:  s   &&r   r8   MemoryMonitor.__init__  s    ,r   c                    ^ RI p^ p \        W!P                  \        P                  ! 4       4      P                  4       P                  R,          4      p\        R4       V P                  '       d   Kl   V# )r   N{Gzt?   )	psutilmaxProcessr}   getpidmemory_inforssr   r:  )r   r?  	max_usages   &  r   measure_cpu_usageMemoryMonitor.measure_cpu_usage  sW    	I~~biik'B'N'N'P'T'TW^'^_I%L&&&r   c                T   < V ^8  d   QhRS[ S[S[S[3,          ,          R,          /# r  r  )rq   r(   s   "r   r  MemoryMonitor.__annotate__  s&     $ $4S#X#7$#> $r   c                    \        4       hr   )NotImplementedErrorr   s   &r   measure_gpu_usageMemoryMonitor.measure_gpu_usage  s    !##r   r9  NT)
r   r   r   r    r8   rF  r   rL  r%   r&   r'   s   @r   r7  r7    s#     -	 $ $r   r7  c                   H   a a ] tR tRt oRV 3R lltV3R lR ltRtVtV ;t# )CudaMemoryMonitori  c                &   < \         SV `  V4       R # r   )superr8   )r   r:  	__class__s   &&r   r8   CudaMemoryMonitor.__init__  s    (r   c                T   < V ^8  d   QhRS[ S[S[S[3,          ,          R,          /# r  r  )rq   r(   s   "r   r  CudaMemoryMonitor.__annotate__  s&     + +4S#X#7$#> +r   c           
     4   ^ RI HpHpHpHpHpHpHp . p. p	 V! 4        V! 4       p
\        V
\        4      '       g   \        P                  RV
 24       R# \        V
4       Uu. uF  p^ NK  	  pp\        V
4       Uu. uF  q! V! V4      4      NK  	  p	p \        V
4       Fg  pV! V! V4      4      p\        V\        4      '       d   \        P                  RV 24        R# \        W,          VP                  R
,          4      W&   Ki  	  \!        R4       V P"                  '       d   K   T! 4        \        T
4       Uu. uF  pRTRY,          RY,          /NK  	  up# u upi u upi u upi   T d"   p\        P                  R	T4        Rp?R# Rp?ii ; i)r   r!  z*nvmlDeviceGetCount result is not integer: Nz%nvmlDeviceGetMemoryInfo returns str: r=  	device_idrm   max_used_MBr-  r>  )r.  r"  r#  r$  r%  r&  r'  r(  r/  r0  r>   r4  r1  r   r@  r,  r   r:  )r   r"  r#  r$  r%  r&  r'  r(  max_gpu_usagegpu_namer3  r  r?   r4  s   &             r   rL  #CudaMemoryMonitor.measure_gpu_usage  s   	
 	
 	
 	J-/LlC00I,XY(-l(;<(;1Q(;M<RWXdRefReQ)*DQ*GHReHf|,A23Ma3PQD!$,,'LTF%ST#'*=+;TYY=P'QM$ - e***N |, -A	  HK!=#3
 -  =f  	LLH%P	sU   <E/ E/ #E .E/ =E%AE/ #AE/ +E/  E*E/  E/ /F6FFr   rN  )	r   r   r   r    r8   rL  r%   r&   __classcell__rS  r(   s   @@r   rP  rP    s     )+ + +r   rP  c                   B   a a ] tR tRt oRV 3R lltR tR tRtVtV ;t	# )RocmMemoryMonitori  c                X  < \         SV `  V4       R p\        P                  P	                  V4      '       d5   V\
        P                  9  d    \
        P                  P                  V4        ^ RIpW0n        V P                  P                  4        R#   \         d    RT n         R# i ; i)z/opt/rocm/libexec/rocm_smiN)
rR  r8   r}   r~   r   sysr   rocm_smiinitializeRsmiImportError)r   r:  rocm_smi_pathrc  rS  s   &&  r   r8   RocmMemoryMonitor.__init__  sw    (477>>-((CHH,.	!$MMM((* 	! DM	!s   -$B B)(B)c                    V P                   f   R# V P                   P                  VR4      ^ ,          R,          R,          # )NVRAMi   )rc  
getMemInfo)r   devs   &&r   get_used_memory!RocmMemoryMonitor.get_used_memory  s7    == I}}''V4Q7$>EEr   c           
     0   V P                   f   R # V P                   e$   \        V P                   P                  4       4      M^ p\        V4       Uu. uF  p^ NK  	  pp\        V4       Uu. uF  pRV 2NK
  	  pp \        V4       F&  p\	        W2,          V P                  V4      4      W2&   K(  	  \        P                  ! R4       V P                  '       d   K`   \        T4       Uu. uF  pRTRYB,          RY2,          /NK  	  up# u upi u upi u upi )NGPUr=  rX  rm   rY  )	rc  r   listDevicesr1  r@  rm  timer   r:  )r   r3  r  rZ  r[  s   &    r   rL  #RocmMemoryMonitor.measure_gpu_usage  s   == ;?==;Ts4==4467Z[$),$78$7q$78',\':;':!c!I':;<(#&}'79M9Ma9P#Q  )JJu&&& <(
 )	 Q}/
 )
 	
 9;
s   D	+D)D)rc  rN  )
r   r   r   r    r8   rm  rL  r%   r&   r]  r^  s   @@r   r`  r`    s     !F

 
r   r`  c           	          R pVR8X  d   \         pM\        pV! R4      pV '       EdN   Ve   TpMVP                  4       pVf   R # Vf   V# \        4       ;_uu_ 4       pV! 4       pVP	                  VP                  4      p VP	                  V4      p	V	P                  4       p
RVn        VP                  4       pVf    R R R 4       R # \        P                  RV RV 24       \        V4      ^8  dy   \        V4      ^8  di   \        V4      \        V4      8X  dP   ^ p\        V4       F2  w  rVR,          pW,          R,          pVV,
          p\        VV4      pK4  	  VuuR R R 4       # R R R 4       R # Ve   TpMVP                  4       pVf   V# \        4       ;_uu_ 4       pV! 4       pVP	                  VP                  4      p VP	                  V4      p	V	P                  4       p
RVn        VP                  4       p\        P                  RVR RVR R	24       W,
          uuR R R 4       #   RTn        TP                  4       pi ; i  + '       g   i     R # ; i  RTn        TP                  4       pi ; i  + '       g   i     R # ; i)
NrocmFzGPU memory usage: before=z  peak=rY  zCPU memory usage: before=z.1fz
 MB, peak=z MB)r`  rP  rL  r   submitr   r:  r>   r?   r   r  r@  rF  )is_gpufuncmonitor_typestart_memorymemory_monitor_typemonitormemory_before_testexecutor
mem_thread	fn_thread_rE  max_usedr  memory_beforebeforeafterr,  s   &&&&              r   measure_memoryr  ,  st   v//!%(Gv#!-!(!:!:!<%<%%!!X)+G!)B)BCJ0$OOD1	$$&).&&--/	  "! KK34F3GwykZ[%&!+I!0CL^H_cfgpcqHq(12D(E$A*=9F%L7E 6>D"8T2H	 )F
  - "!!.  )$668|!!			%'__W%>%>?
	, -I  "A%*G""))+I/0B3/GzR[\_Q``cde- 
	7 */&&--/	 "!. $ &+G""))+I 
		sO   #I!H0"IBI$#I<!I )<I<0I		II	 I99I<<J	c                      . ROp RpV  F=  p\         P                  ! V4      pVf   K  V'       d
   VR,          pW RV 2,          pK?  	  V# )ORT_DISABLE_FUSED_ATTENTIONr   ,=)r  !ORT_ENABLE_FUSED_CAUSAL_ATTENTION!ORT_DISABLE_FUSED_CROSS_ATTENTIONORT_DISABLE_TRT_FLASH_ATTENTION&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONORT_TRANSFORMER_OPTIONSORT_CUDA_GEMM_OPTIONS)r}   getenv)	env_namesenvrm   r   s       r   get_ort_environment_variablesr  p  sU    I C		$=3JCq    Jr   rj  rN  r   )r   ){   )rN   N)2r   rt   r}   r  rb  rr  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr	   r   r   rs   	packagingr
   rT   rx   r   r>   r   r*   r4   rG   r   ro   r{   r   r   r   r   r   r   longlongr  r  r  r5  r7  rP  r`  r  r  r   r   r   <module>r     s     	  
   # 1         			8	$ D  , u}}   -2AHDM8 ID7JtK(* nn:zR%#L$C $(/ /d(
 (
VA.Hr   