+
    9iN                     D   ^ RI t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIH	t	 ^ RI
H
t
 ^ RIHt ^ RIt^ RIt^ RIt^ RIHtHt ]	 ! R R4      4       t]	 ! R R	4      4       tRR
 ltR tR tR tR tR tR tR tR tR tR t R t!R t"]#R8X  d   Rt$]"! 4        R# R# )    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                   ,   a  ] tR t^!t o V 3R ltRtV tR# )TestSettingc                   < V ^8  d   Qh/ S[ ;R&   S[ ;R&   S[ ;R&   S[ ;R&   S[;R&   S[;R&   S[;R&   S[ ;R&   S[ ;R	&   S[;R
&   S[ ;R&   S[ ;R&   S[;R&   # )   
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_length)intboolstr)format__classdict__s   "e/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/bert_perf_test.py__annotate__TestSetting.__annotate__!   s     O    O	 
 O  M    M    I  M    !   !       N__name__
__module____qualname____firstlineno____annotate_func____static_attributes____classdictcell__r   s   @r   r	   r	   !         r!   r	   c                   ,   a  ] tR t^2t o V 3R ltRtV tR# )ModelSettingc                   < V ^8  d   Qh/ S[ ;R&   S[ ;R&   S[ ;R&   S[ ;R&   S[;R&   S[ R,          ;R&   S[ R,          ;R&   S[;R	&   # )
r   
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_type)r   r   )r   r   s   "r   r   ModelSetting.__annotate__2   sp     O    	 
   N  *$  :%  N r!   r"   Nr#   r+   s   @r   r.   r.   2   r,   r!   r.   c                    ^ RI pVP                  V4       V'       d!   RVP                  4       9  d   \        R4       V'       d6   VR8X  d   RR.pM,VR8X  d   RR.pM VR	8X  d   RR.pMVR
8X  d   . ROpMRR.pMR.pVP	                  4       p	WYn        VP                  P                  V	n        Vf   VP                  P                  V	n        MV^ 8X  d   VP                  P                  V	n        MV^8X  d   VP                  P                  V	n        MoV^8X  d   VP                  P                  V	n        MLV^8X  d   VP                  P                  V	n        M)V^c8X  d   VP                  P                  V	n        MWIn        Ve   W9n        VP#                  W	VR7      p
V'       d   VR8X  d   RV
P%                  4       9   g   Q hMVR8X  d   RV
P%                  4       9   g   Q hMVR	8X  d   RV
P%                  4       9   g   Q hMeVR
8X  d0   RV
P%                  4       9   g   Q hRV
P%                  4       9   g   Q hM/RV
P%                  4       9   g   Q hMRV
P%                  4       9   g   Q hVeD   \'        V4      ;_uu_ 4       pV
P)                  \*        P,                  ! V4      4       RRR4       V
# V
#   + '       g   i     T
# ; i)r   NCUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProvidermigraphxMIGraphXExecutionProvidercudatensorrtTensorrtExecutionProvider)	providers)rB   r:   r=   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDORT_ENABLE_LAYOUTr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r0   r   r   r   rO   r   tuning_results_pathrD   execution_providerssess_optionssessionfs   &&&&&&&     r   create_sessionr_   >   s    ++L9+;3V3V3XX N	
 u#9;Q"R#+&# #:<R"S## $;<R"S56--/L&2#"-";";"J"JL'0;0R0R0a0a-	!Q	&0;0R0R0b0b-	!Q	&0;0R0R0c0c-	!Q	&0;0R0R0f0f-	!Q	&0;0R0R0d0d-	!R	'0;0R0R0a0a-0H-',@)**:Ob*cGu)W-B-B-DDDD#.'2G2G2IIII*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE%)>)>)@@@@&%&&!&&tyy|4 ' N7N '& Ns   &J44K	c                    \         P                  \        P                  \         P                  \        P                  \         P                  \        P                  \         P
                  \        P                  /pW,          # N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps   & r   
numpy_typerk      sH    rzzrzzRXXR[[	H r!   c                 <   V P                  4        UUu/ uF+  w  r4V\        P                  ! V4      P                  V4      bK-  	  pppVP                  4        UUu/ uF+  w  r4V\        P                  ! V4      P                  V4      bK-  	  pppWV3# u uppi u uppi ra   )itemsrb   
from_numpyto)inputsoutputsdevicenamearrayinput_tensorsoutput_tensorss   &&&    r   create_input_output_tensorsrw      s    QWQ]Q]Q_`Q_+$T5++E255f==Q_M`RYR_R_RabRa;4dE,,U366v>>RaNb(( abs   1B1Bc           
         V P                  4       pVP                  4        FZ  w  rEVP                  VVP                  P                  ^ \        VP                  4      VP                  VP                  4       4       K\  	  VP                  4        FZ  w  rEVP                  VVP                  P                  ^ \        VP                  4      VP                  VP                  4       4       K\  	  V# r   )

io_bindingrm   
bind_inputrr   typerk   dtypeshapedata_ptrbind_output)sessru   rv   rz   rs   tensors   &&&   r   create_io_bindingr      s    "J%++-MMv||$LLOO	
 . ',,.MMv||$LLOO	
 / r!   c                     . p. pVP                   '       d   R MRp\        V4       F  w  rxV P                  W(4      p	VP                  V	4       / p
\	        \        V4      4       F  pW,          WV,          &   K  	  \        WV4      w  r\        WV4      pV P                  V4       \        P                  ! 4       pV P                  V4       \        P                  ! 4       V,
          pVP                  V4       K  	  WE3# )r@   cpu)r   	enumeraterunappendrangelenrw   r   run_with_iobindingtimeitdefault_timer)r]   
all_inputsoutput_namestest_settingresultslatency_listrr   _test_case_idrp   resultrq   iru   rv   rz   
start_timelatencys   &&&&             r   %onnxruntime_inference_with_io_bindingr      s    GL#+++VF!*:!6\2vs<()A'-yGO$ * )DFU[(\%&w~N
 	"":.))+
"":.&&(:5G$! "7$   r!   c                 l   \        V4      ^ 8  d'   V P                  V\        P                  ! V4      4       . p. p\	        V4       Fi  w  rV\
        P                  ! 4       pV P                  W&4      p\
        P                  ! 4       V,
          p	VP                  V4       VP                  V	4       Kk  	  W43# ry   )r   r   randomchoicer   r   r   r   )
r]   r   r   r   r   r   rp   r   r   r   s
   &&&       r   onnxruntime_inferencer      s    
:L&--
";<GL!*:!6))+
\2&&(:5vG$ "7   r!   c                    VP                  4       pR \        P                  P                  V 4       R2pVRVP                   RVP
                   R2P                  RR4      ,          pVRVP                   RVP                   R2,          pVRVP                   R	VP                   R2,          pVR
VP                   RVP                   R2,          pVRVP                   R2,          pVRVP                   2,          pV# )zmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerO   r   replacer   r   r   r   r   r   r   r   )r0   r]   r   r\   options   &&&  r   	to_stringr      s9   ..0Lbgg&&z2315F
),*O*O)PPfgs  hI  hI  gJ  JK  L  T  T%r F L3344ElFbFbEccdeeF
L334LAXAX@YYZ[[F
--..>|?Z?Z>[[\]]F
()M)M(NaPPF
'(K(K'LMMFMr!   c           
         \        V P                  VP                  VP                  VV P                  VP
                  V P                  R 7      pVP                  4        Uu. uF  qfP                  NK  	  pp\        V P                  WQ4      pW9   d   \        RV4       R# \        RV4       . p	VP                  '       d=   \        VP                  4       F"  p
\        WSWq4      w  rV	P                  V4       K$  	  M;\        VP                  4       F"  p
\!        WSV4      w  rV	P                  V4       K$  	  \"        P$                  ! V	4      R,          p\&        P(                  ! V4      p\"        P*                  ! V^24      p\"        P*                  ! V^K4      p\"        P*                  ! V^Z4      p\"        P*                  ! V^_4      p\"        P*                  ! V^c4      pVP,                  RV,          ,          pVVVVVVV3W(&   \        RP/                  \/        VR4      \/        VR4      4      4       V P0                  '       d   \2        P4                  P7                  V P0                  4      p\2        P4                  P9                  V4      '       dR   TpVP;                  R^4      ^ ,           R	\<        P>                  ! 4       PA                  4        R2p\        R
VRVR4       VPC                  4       p\E        VR4      ;_uu_ 4       p\F        PH                  ! VV4       RRR4       \        RV4       R# R# u upi   + '       g   i     L%; i))r   rZ   zskip duplicated test:NzRunning test:  g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.json.zWARNING:zexists, will write tozinstead.wzTuning results is saved to)%r_   r0   r   r   r4   r   r5   get_outputsrs   r   rG   r   r   r   r   extendr   rd   rt   
statisticsmean
percentiler   r   r6   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrV   rX   dump)model_settingr   perf_resultsr   r   r]   outputr   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsr^   s   &&&&&                    r   run_one_testr      s     !..)>>G /6.A.A.CD.CFKK.CLD
M,,g
DC
%s+	/3"""//0B$I\%!G ##L1	 1 //0B$9'|$\!G##L1 1
 *+d2J ooj1Oz2.Jz2.Jz2.Jz2.Jz2.J((F_,DEJ 	L 
6==f_V[>\^deoqv^wx ***ggoom&I&IJ77>>+&&)O*11'1=a@A8<<>C[C[C]B^^cfK*o/FU_`((*+s##qIIc1 $*K8 +] El $#s   !L7L<<M	c                     \         P                  ! \        V VVVV3R 7      pVP                  4        VP	                  4        R# ))targetargsN)multiprocessingProcessr   startjoin)r   r   r   r   r   processs   &&&&& r   launch_testr   7  s=    %% 
	G MMOLLNr!   c                    VP                   e   \        V VVVVP                   4       R # \        P                  ! RR7      p\        P                  ! RR7      p\	        WT04      p\        ^\        ^V4      4       F  pWv9  g   K  VP                  V4       K  	  VP                  RR7       V F  p\        WW#V4       K  	  R # )NF)logicalT)reverse)	r   r   psutil	cpu_countlistr   minr   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	   &&&&     r   run_perf_testsr   F  s    ((4--	
 	  /I$$T2Mm781c"m,-%$$Q' . 4( 1MK_` !2r!   c                    \        V P                  V P                  V P                  V P                  4      w  r4p\        R VP                   RVP                   RVP                   24       \        VP                  VP                  VP                  VP                  VP                  VVVVP                  VP                  V P                  R7      p\        WW&4       R# )zGenerating z samples for batch_size=z sequence_length=)r7   N)r   r0   r1   r2   r3   rG   r   r   r   r   r   r   r   r   r7   r   )r   r   r   	input_idssegment_ids
input_maskr   s   &&&    r   run_performancer   ^  s    )8  $$&&%%	*&IJ 

l--..F|G^G^F__pq}  rN  rN  qO  	P $$$,,++))J =Ir!   c            	      
   \         P                  ! 4       p V P                  R R\        RR7       V P                  RRR\        RRR7       V P                  R	R
R\        RR7       V P                  RR\        ^
RR7       V P                  RRR\        ^ RR7       V P                  RR\        . R@O^cRR7       V P                  RR\        ^RR7       V P                  RRRRR7       V P                  RR7       V P                  RR\        ^. RAORR7       V P                  R RRR!R7       V P                  RR"7       V P                  R#RRR$R7       V P                  RR%7       V P                  R&R\        R'R(R7       V P                  R)R*R\        R'R+R7       V P                  R,R\        R'R-R7       V P                  R.R\        R'R/R7       V P                  R0R\        R'R1R7       V P                  R2R'\        R3R47       V P                  R5R'\        R6R47       V P                  R7R8RB\        R9R47       V P                  R:R;RRR<R7       V P                  RR=7       V P                  R>R\        ^R?R7       V P                  4       pV# )Cz--modelTzbert onnx model path)requiredr|   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   r|   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesFz!number of samples to be generated)r   r|   defaultr   z-tz--test_timeszJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_levelzfonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.)r   r|   choicesr   r   z--seedzPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityz.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   r|   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerNzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   r|   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))r      r      c   )r   r   r   r      )argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   s     r   parse_argumentsr   {  s=   $$&F
	DsAWX
Z   /   0   Y    u   _   (	   &
=   eLyY
&
*U<Vfg
u-
(    ,   '   )   ,    B	   !@	   #8   "B   u5
|   DKr!   c                     \        4       p V P                  ^ 8X  d,   \        ^\        RV P                  ,          4      4      V n        V P
                  ^ 8:  d   V P                  V n        \        P                  ! 4       pVP                  4       p\        V P                  4      p\        V4      ^8  d   \        V4      ^8:  g   \        R4      h\        V P                  V P                   V P"                  V P$                  V P&                  V P(                  V P*                  V P,                  4      pV F  p\/        VV P                  V P                  V P                  V P0                  V P2                  V P4                  V P6                  V P8                  V P:                  V P<                  V P
                  V P>                  4      p\A        RV4       \C        WFV4       K  	  \E        VPG                  4       RR R7      p\H        PJ                  PM                  \O        V P                  4      PP                  RPS                  V P0                  '       d   RMR	R
PM                  \E        V4       Uu. uF  p\U        V4      NK  	  up4      V P                  \V        PX                  ! 4       P[                  R4      4      4      p	\]        V	RRR7      ;_uu_ 4       p
\^        P`                  ! V
RRR7      pRpV F  w  rVPc                  R4      pVfL   . ROpTPe                  V Uu. uF  qPc                  R4      ^ ,          NK  	  up4       VPg                  V4       V Uu. uF  p\S        VR4      NK  	  ppTPe                  V Uu. uF  qPc                  R4      ^,          NK  	  up4       VPg                  V4       K  	  RRR4       \A        RV	4       R# u upi u upi u upi u upi   + '       g   i     L2; i)r   r   z batch_size not in range [1, 128]ztest settingFc                     V ^,          # )r   r"   )xs   &r   <lambda>main.<locals>.<lambda>M  s	    qQRtr!   )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorNr   =r   zTest summary is saved to)zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS))4r   r   maxr   samplesr   r   r   Managerdictsetr   r   	Exceptionr.   modelr1   r2   r3   r4   r5   r6   r7   r	   r   r   r   r   r   r   r   r   rG   r   sortedrm   r   r   r   r   parentr   r   r   r   strftimerV   csvwritersplitr   writerow)r   managerr   batch_size_setr   r   r   sorted_resultsr   summary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                    r   mainr#    s   D!aTDLL%8!9:##q('+';';$%%'G<<>L)N1$^)<)C:;; 

!!""	M %
"  LLOOLLMM%%IILL((''
  	nl+\B% %* L..0%^TN77<<TZZ(//\\\EuHHf^&<=&<c!f&<=>  LLN##O4		
L 
lD"	-	-ZZDN
 .CYYs^F @AQ@A##G,0;<1fQ&F<MMF;Fq773<??F;<'# !/ 
., 

$l37 >(  A =;' 
.	-s=   ,OAO& O>O&O
/O&  O! O&O&&O6	__main__)Nr   N)%r   r  rX   r   r   r   r   r   dataclassesr   r   pathlibr   numpyrd   r   rb   bert_test_datar   r   r	   r.   r_   rk   rw   r   r   r   r   r   r   r   r   r   r#  r$   __spec__r"   r!   r   <module>r*     s     
   	    !      > ! ! !    Rj ).!2! B9Ja0J:_DQ4h zHF	 r!   