+
    9ig                        ^ RI t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt^ RIt	^ RI
t
^ RIt^ RIt^ RIHtHt ^ RIHtHt ^ RIHtHtHtHtHtHt ^ RIHt ^ RIHtHtHt ^ RI H!t! ^ RI"H#t#H$t$H%t% ^ RI&t']PP                  ! ])4      t*R	 t+R
 R lt,R R lt-R t.R t/R t0R t1R t2R t3RR lt4R t5])R8X  d
   ]5! 4        R# R# )    N)measure_memorysetup_logger)get_rankget_size)add_io_bindings_as_ortvalues%get_merged_sample_with_past_kv_inputsget_msft_sample_inputsget_sample_inputsget_sample_with_past_kv_inputsverify_ort_inputs)ORTModelForCausalLM)ProfilerActivityprofilerecord_function)trange)
AutoConfigAutoModelForCausalLMAutoTokenizerc                    V P                   R9   d   ^ # V P                   R8X  d    \        VP                  4      # \        VP                  4       4      #   \         d#    \        TP                  P
                  4      u # i ; i)hf-pt-eagerhf-ort   r   hf-pt-compile)benchmark_typeleninputs_names	Exceptiondecoderinput_names
get_inputs)argsmodels   &&m/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/models/llama/benchmark.pyget_ort_model_inputs_lenr$   (   st    >>h&	2u))** u!""  	2u}}0011	2s   A *B Bc                D    V ^8  d   QhR\         P                  R\        /# )   r!   ort_model_inputs_len)argparse	Namespaceint)formats   "r#   __annotate__r,   5   s$     $ $X'' $s $    c                    R R r2V P                   R8X  d   RMV P                  P                  pV P                   R9   d   \        V P                  V P                  V P
                  V P                  RR7      p\        V P                  V P                  V P
                  V P                  V P                  RR7      pW#3# V P                   R9   Ed,   V^8X  d   \        V P                  V P                  V P
                  V P                  RR7      p\        V P                  V P                  V P
                  V P                  V P                  RR7      pW#3# \        V P                  V P                  V P
                  V P                  ^ VV P                  V P                  RRR7
      p\        V P                  V P                  V P
                  ^V P                  VV P                  V P                  RRR7
      p W#3# V P                   R8X  d   \        V P                  V P                  V P
                  V P                  ^ VV P                  V P                  R	RV P                  R
7      p\        V P                  V P                  V P
                  ^V P                  VV P                  V P                  R	RV P                  R
7      pW#3# V P                   R8X  d   V^8  p\        V P                  V P
                  ^ V P                  VV P                  V P                  VR7      p\        V P                  V P
                  V P                  ^VV P                  V P                  VR7      pW#3# \        R4      h)Nort-msfti   T)return_dict)use_fp16r0   pt)seq_lenpast_seq_lenmax_seq_lenr1   use_buffer_shareenginer0   ort-convert-to-onnxort)r3   r4   r5   r1   r6   r7   r0   
world_size)r4   r3   r5   r1   r6   split_kvz/Unable to auto-detect inputs for provided modelr   >   r   )r   configmax_position_embeddingsr
   target_device
batch_sizesequence_lengthr   r1   r   r6   r:   r	   r   )r!   r'   init_inputsiter_inputsr5   r;   s   &&    r#   r    r    5   s1   #T
 --;$AdAdK>>'KKOO  
 5KKOO  ]]
^ ##M 
		
	*1$+""$$ K 9""$$ Kx ##e @"",,'!%!6!6 K @""!11'!%!6!6 KL ##s 
		 5	5;KKOO((#]]!22
 <KKOO--#]]!22
T ##9 
		
	*'!+,KKOO((#]]!22	
 -KKOO--#]]!22	
 ## IJJr-   c                8    V ^8  d   QhR\         P                  /# )r&   r!   )r(   r)   )r+   s   "r#   r,   r,      s     R RH&& Rr-   c                 ~   R R r!R R rCV P                   R9   Ed    V P                  '       d   V P                  MV P                  p\        P                  ! 4       p\        P
                  ! TV P                  '       d   \        P                  M\        P                  V P                  V P                  RV P                  R7      P                  V P                  4      p\        P                  ! 4       pV P                   R8X  d   \        P                  ! V4      pMpV P                   R9   dH   \        P                   ! 4       pV P"                  Vn        V P&                  '       d   ^Vn        ^Vn        M\-        RV P                    24      hV P                   R8X  EdH   \/        V P0                  4      \2        J d   V P0                  ^ ,          MV P0                  p\/        V P0                  4      \2        J d   V P0                  ^,          MR pR pR p	\4        P6                  ! V P8                  4       F@  p
RV
9  g   RV
9   g   RV
9   d   K  R	V
9   g   V
R
8X  d   T
pRV
9   d   T
p	RV
9   g   K<  T
pT
p	KB  	  \        P                  ! 4       p\:        P
                  ! V P8                  TT	V P                  V P                  RVR
8X  d   RMR VVVR7
      p\        P                  ! 4       pV P                   R9   d   \<        P?                  RV P@                  PC                  V PD                  4       24       \        P                  ! 4       p\        PF                  ! V P@                  PC                  V PD                  4      VV P0                  .R7      p\        P                  ! 4       p\<        P?                  RWC,
           R24       V# )Nr   T)torch_dtypeuse_auth_tokentrust_remote_code	use_cache	cache_dirr   Cannot recognize z.onnxz
.onnx_dataz
.onnx.datadecoder_modelz
model.onnxdecoder_with_past_modeldecoder_merged_model)	decoder_file_namedecoder_with_past_file_namerF   rG   use_io_binding
use_mergedproviderprovider_optionssession_optionszLoading model from )	providerszLoaded model in  sr   >   r   r/   r8      r/   r8   )$r   hf_pt_dir_path
model_nametimer   from_pretrainedr1   torchfloat16float32authrI   tor>   compiler9   SessionOptionsr   enable_profilingverboselog_verbosity_levellog_severity_levelr   typeexecution_providertupleoslistdirhf_ort_dir_pathr   loggerinfoort_model_pathr+   rankInferenceSession)r!   r"   sess_options
start_timeend_timesourcerR   rS   rN   rO   filenames   &          r#   	get_modelrw      s   < >>(,(;(;(;$$YY[
$44)-EMM99"iinn
 "T
  	 99;/1MM%(E			 M	M))+(,%<<</0L,./L+ +D,?,?+@ABBh&15d6M6M1NRW1W4**1-]a]t]t9=d>U>U9VZ_9_42215ei &*#

4#7#78Hh&,(*BlV^F^(*h,.F$,!(H4.6+%1$,!.6+ 9 YY[
#33  /(C99"ii 1\ At-(
 99;AA)$*=*=*D*DTYY*O)PQRYY[
$$&&tyy1../

 99;
KK"8#8"9<=Lr-   c                 F  a  S P                   R9   d   \        S P                  4      M%\        S P                  \        P
                  RR7      pS P                  '       d   V! V4      p\        P                  V4       V 3R lpV 3R lpV F  pV! 4        V! V4       V! 4        K  	  ^ pS P                   R9   d   \        S P                  4      M%\        S P                  \        P
                  RR7      p	V	 FR  pV! 4        \        P                  ! 4       p
V! V4       V! 4        \        P                  ! 4       pWV
,
          ,          pKT  	  S P                   R9  d   \        P                  R4       VS P                  ,          pS P                  V,          pS P                  ^ 8X  dw   \        P                  RS P                   24       \        P                  RS P                   24       \        P                  R	V R
24       \        P                  RV R24       R# )r/   zWarm up)filedescc                     < SP                   R 8w  d,   SP                  R9   d   SP                  P                  4       # V3R l# )cpuc                     < SP                   R 8w  dC   \        P                  P                  4       '       d   \        P                  P	                  4       # R # )r|   c                      R # N kwargss   *r#   <lambda>=time_fn.<locals>.<lambda>.<locals>.<lambda>.<locals>.<lambda>      r-   devicer\   cudais_availablesynchronizer   r!   s   *r#   r   +time_fn.<locals>.<lambda>.<locals>.<lambda>  @    {{e#

(?(?(A(A JJ""$ &%&r-   rW   )r   r   
io_bindingsynchronize_inputsr   s   *r#   r   time_fn.<locals>.<lambda>  s>    ;;%D$7$7;^$^ 	**, 	

	
r-   c                     < SP                   R 8w  d,   SP                  R9   d   SP                  P                  4       # V3R l# )r|   c                     < SP                   R 8w  dC   \        P                  P                  4       '       d   \        P                  P	                  4       # R # )r|   c                      R # r   r   r   s   *r#   r   r   (  r   r-   r   r   s   *r#   r   r   %  r   r-   rW   )r   r   r   synchronize_outputsr   s   *r#   r   r   "  s>    ;;%D$7$7;^$^ 	++- 	

	
r-   	Benchmark zBatch Size: zSequence Length: z	Latency: rV   zThroughput: z tpsNrW   )r   rangewarmup_runsr   sysstdoutrd   rm   rn   num_runsrZ   r?   rp   r@   )r!   fninputswarmup_rangeoutputs
input_syncoutput_sync_
total_timebench_rangers   rt   latency
throughputs   f&&           r#   time_fnr     s    "EE 	dD$$3::IF  |||V*GJK 
6
  J "EE 	dmmDMM

E 
 YY[

6
99;++
  "EEB4==(G7*JyyA~l4??"345'(<(<'=>?iy+,l:,d34
r-   c                    R V P                    RV P                   RV P                  P                  4        RV P                   RV P
                   RVP                  P                  RR4       RV R\        P                  P                  4       R 2pRpV P                  R9   d   \        \        P                  \        P                  .RRR7      ;_uu_ 4       p\        R4      ;_uu_ 4        V! V4       RRR4       RRR4       XP                  ^R	7      P!                  V P"                  V P$                  R
7      p\&        P(                  P+                  V P,                  V R24      p\/        VR4      ;_uu_ 4       pVP1                  V4       RRR4       V# V! V4       V R2pV#   + '       g   i     L; i  + '       g   i     L; i  + '       g   i     T# ; i)b_sr   -z%Y-%m-%d_%H:%M:%SNT)
activitiesrecord_shapesprofile_memorymodel_inference)group_by_stack_n)sort_by	row_limitz.logwz.jsonr   )r?   r@   r   lower	precisionr   __name__replacedatetimenowr   r   CPUCUDAr   key_averagestablept_filter_bypt_num_rowsrj   pathjoin
log_folderopenwrite)	r!   r   r   inputs_typeprefixrv   prof	prof_datafs	   &&&&     r#   
profile_fnr   R  s    !D$8$8#94;N;N;T;T;V:WWXY]YgYgXhhijnjujuivvwxz  yD  yD  yL  yL  MP  RU  yV  xW  WX  Yd  Xe  ef  go  gx  gx  g|  g|  g~  P  fQ  RFH>>(,,.>.C.CDTXim
 
 !2336
 4

 %%q%9??HYHYeieueu?v	77<<F84A(C  AGGI ! O 	6
 XU#O 43
 
 !  Os0   F<-	F)	6F<>G)F94F<<G	G 	c                    aa \         P                  ! 4       p\        P                  ! V4      pVP	                  R R7       S! S4       V P
                  ^ 8X  dF   \        P                  RVP	                  RR7      \        P                  ! RR7      ,           R24       \        P                  ! 4        \        P                  P                  4        \        V P                  R8g  VV3R lR	7       \         P"                  P%                  4        R# )
g?)intervalzCPU usage: NF)logical%r|   c                     < S ! S4      # r   r   )r   r   s   r#   r   measure_fn.<locals>.<lambda>|  s	    r&zr-   )is_gpufunc)rj   getpidpsutilProcesscpu_percentrp   rm   rn   	cpu_countgccollectr\   r   empty_cacher   r   r   r   flush)r!   r   r   pidprocesss   &ff  r#   
measure_fnr   o  s    
))+CnnS!G%vJyyA~k'"5"5t"5"DvGWGW`eGf"f!gghij JJL	JJ4;;%/7IJ JJr-   c                   a V3R  lpTpV P                   R8X  d   V! V4       V! V4       V P                  '       Ed;   \        WVR4      pV P                   R8X  d   SP                  P                  P                  4       p\        P                  RV RV 24       \        P                  ! V\        P                  P                  V P                  V4      4       \        WVR4      pV P                   R8X  d   SP                  P                  P                  4       p\        P                  RV RV 24       \        P                  ! V\        P                  P                  V P                  V4      4       R# \        P                  R4       \        WV4       \!        WV4       \        P                  R	4       \        WV4       \!        WV4       R# )
c                    < S! R/ V B pV# )Nr   r   r   r   r"   s   & r#   
get_logits$run_hf_inference.<locals>.get_logits  s    /&/r-   r   promptr   	Renaming  to tokenN7
Evaluating `model(inputs)` step to get past_key_values5
Evaluating `model(inputs)` step with past_key_values)r   r   r   r   sessionend_profilingrm   warningrj   renamer   r   r   decoder_with_pastrn   r   r   )r!   rA   rB   r"   r   generate_fnnew_lognameold_lognames   &&&f    r#   run_hf_inferencer     s[   4 Ko-K K ||| KJ(*--//==?KNNY{m4}EFIIk277<<#MN KI(*1199GGIKNNY{m4}EFIIk277<<#MN KKJKD{+t+.
KKHID{+t+.r-   c                 ~  a a V V3R  lpV3R lpV3R lpS P                   R8w  d   TMTp/ pS P                  '       Ed   V! W4      w  r\        S WyR4      p
SP                  4       p\        P                  RV RV
 24       \        P                  ! V\        P                  P                  S P                  V
4      4       \        S 4      oV! W(4      w  r\        S W|R4      p
SP                  4       p\        P                  RV RV
 24       \        P                  ! V\        P                  P                  S P                  V
4      4       R# \        P                  R	4       V! W4      w  r\        S Wy4       \        S Wy4       \        P                  R
4       V! W(4      w  r\        S W|4       \        S W|4       R# )c                    < \        SV 4      p SP                  R 8w  dJ   \        SV SP                  \        SP                  4      SP
                  V4      w  r!\        SRV4       W!3# W3# )r|   r   )r   r   r   r*   rp   r6   setattr)r   kv_cache_ortvaluesr   r!   r"   s   && r#   prepare_ort_inputs-run_ort_inference.<locals>.prepare_ort_inputs  sj    "5&1 ;;%-Ivt{{C		ND<Q<QSe.*J D,
311))r-   c                 *   < SP                  V 4       R # r   )run_with_iobinding)r   r"   s   &r#   with_io_binding*run_ort_inference.<locals>.with_io_binding  s      ,r-   c                 ,   < SP                  R V 4      pV# r   )runr   s   & r#   without_io_binding-run_ort_inference.<locals>.without_io_binding  s    ))D&)r-   r|   r   r   r   r   Nr   r   )r   r   r   r   rm   r   rj   r   r   r   r   rw   rn   r   r   )r!   rA   rB   r"   r   r   r   r   r   ort_init_inputsr   r   ort_iter_inputss   f&&f         r#   run_ort_inferencer    sl   *-
 &*[[E%9/?QK|||.@.a+ {XN ))+;-tK=AB
		+rww||DOO[IJ $.@.a+ {WM ))+;-tK=AB
		+rww||DOO[IJ KKJK*<[*]'OD+/t[2
KKHI*<[*]'OD+/t[2r-   c                     V P                   R9   d   \        WW#4       R# V P                   R9   d   \        WW#4       R# \        RV P                    24      h)r   rJ   N>   r   r   r   rW   )r   r   r  r   )r!   rA   rB   r"   s   &&&&r#   run_inferencer    sN    HHK?			 C	C$[@+D,?,?+@ABBr-   c           
         \         P                  ! 4       pVP                  R R\        R. RMOR7       VP                  RR\        RRR7       VP                  R	R
RRRR7       VP                  RRR\        R. RNORR7       VP                  R\        RRR7       VP                  R\        RRR7       VP                  R\        RRR7       VP                  RRR R!7       VP                  R"R#R$R!7       TP                  R%R&\        \        P
                  P                  4       '       d   R'MR(R(R'.R)7       VP                  R*R+\        ^ R,7       VP                  R-R.\        ^R,7       VP                  R/R0\        ^
R,7       VP                  R1\        ^R,7       VP                  R2\        ^ R,7       VP                  R3\        ^R,7       VP                  R4RRR57       VP                  R6\        R7R8R7       VP                  R9\        R:R;R7       VP                  R<RRR57       VP                  R=\        \        P                  P                  R>4      R?R7       VP                  R@\        RRARBRC7       VP                  4       p\        P                  P                  VP                  4       \        P                  ! VP                  4       RDVP                   9   dO   \#        VREVP$                  P'                  4        RF24       VP(                  RG8X  d   VP(                  RHV /3Vn        VP                   R8X  d   VP*                  '       g   Q RI4       hVP                   RO9   d   VP,                  '       g   Q RJ4       hVP.                  P1                  RK4      Vn        VP2                  P1                  RK4      Vn        VP4                  RP9   g#   VP4                  R8X  d   VP$                  R(8X  d   RMRVn        VP6                  '       d<   \9        VP.                  4      ^8X  d   \9        VP2                  4      ^8X  g   Q RL4       hV# )Qz-btz--benchmark-typeTr   )rg   requiredchoicesz-mz--model-namez<Hugging Face name of model (e.g. 'meta-llama/Llama-2-7b-hf'))rg   r  helpz-az--authF
store_truez5Use Hugging Face authentication token to access model)defaultactionr	  z-pz--precisionfp32int4fp16zePrecision for model. For ONNX models, the model's precision should be set before running this script.)r  rg   r  r  r	  z--hf-pt-dir-pathr   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rg   r  r	  z--hf-ort-dir-pathzhPath to directory containing all ONNX files (e.g. tokenizer, decoder_merged, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-bz--batch-sizesz1 2)r  z-sz--sequence-lengthsz32 64 128 256 512z-dz--devicer   r|   )rg   r  r  z-idz--device-id)rg   r  z-wz--warmup-runsz-nz
--num-runsz--seedz--max-lengthz--num-return-sequencesz	--profile)r  r  z--pt-filter-byself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--cache-dirz./model_cachez-Cache dir where Hugging Face files are stored)rg   r  r  r	  r9   rh   ExecutionProviderCUDAExecutionProvider	device_idz,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path` zOPlease provide only one (batch_size, sequence_length) combination for profiling)r   r   r   r/   r8   )r  int8r  r  rW   >   r  r  )r(   ArgumentParseradd_argumentstrr\   r   r   r*   rj   r   r   
parse_argsnprandomseedmanual_seedr   r   r   upperrh   rl   ro   batch_sizessplitsequence_lengthsr   r   r   )rp   parserr!   s   &  r#   get_argsr$    sA   $$&F

   K   hlAx  
 0t   ]	   w	   !	     
 #  
 **1133   }3B
oCC
lbA
sA6 S"=
0sAF U<H
s,AHl   c4Fvw
U<H
S"'',,s:KRmn
<   D IINN499	dii  ###*t{{/@/@/B.CCT,UV""&=='+'>'>d@S&TD# h&###S%SS#AA"""Q$QQ"''--c2D 1177<D ..$4469QVZVaVaejVjqw 	N
 |||4##$)c$2G2G.HA.M 	
]	
M Kr-   c                     \        4       p \        4       p\        V 4      p\        VP                  4       \
        P                  VP                  4       R \        P                  P                  n        Wn        Wn        \        P                  ! VP                   VP"                  VP$                  VP$                  R7      p\&        P                  ! VP                   VP"                  VP$                  VP$                  R7      pVP(                  R8w  d   RVP                   2MVP(                  pVP*                  R8H  p\-        VRV4       \-        VRV4       \-        VRV4       \-        VRV4       \/        V4      p\1        W'4      pVP2                  R9   d   \4        P6                  ! VP8                  P;                  VP                  4      R	R
7      p	\=        \?        R V	P@                  PB                  4      4      p
T;'       d&    \E        V
4      ^ 8  ;'       d    VP(                  R8g  p\-        VRV4       M\-        VRR	4       \F        PH                  ! VPJ                  VPL                  4       Fw  w  rVP                  ^ 8X  d   \
        P                  RV RV R24       \-        VR\O        V4      4       \-        VR\O        V4      4       \Q        W(4      w  r\S        W.W4       Ky  	  R# )T)rI   rF   rG   r|   zcuda:r  	tokenizerr<   r>   r1   F)load_external_datac                      V P                   R 8H  # )GroupQueryAttention)op_type)nodes   &r#   r   main.<locals>.<lambda>  s    T\\=R-Rr-   r6   z
Batch size = z and sequence length = z...r?   r@   NrW   )*r   r   r$  r   rd   rm   rn   __dict__r\   backendscudnn	benchmarkrp   r:   r   r[   rY   rI   r_   r   r   r   r   rw   r$   r   onnx
load_modelro   r+   listfiltergraphr+  r   	itertoolsproductr   r"  r*   r    r  )rp   r:   r!   r&  r<   r>   r1   r"   r'   
onnx_model	gqa_nodesr6   r?   r@   rA   rB   s                   r#   mainr:    sF   :DJD>D
KK%)ENN"I O--4>>$))_c_h_hI ''4>>$))_c_h_hF ,0;;%+?eDII;'T[[M~~'HD+y)D(F#D/=1D*h' dOE3D@ AA__T%8%8%?%?		%J_de
 RT^TdTdTiTijk	#SSI(:SSt{{e?S(*:;(%0 (1'8'89I9I4K`K`'a#
99>KK/*5L_L]]`ablC
O4'_)=>#-d#I d< (br-   __main__)r   )6r(   r   r   r6  loggingrj   r   rZ   numpyr  r1  r   r\   benchmark_helperr   r   dist_settingsr   r   llama_inputsr   r   r	   r
   r   r   optimum.onnxruntimer   torch.profilerr   r   r   tqdmr   transformersr   r   r   onnxruntimer9   	getLoggerr   rm   r$   r    rw   r   r   r   r   r  r  r$  r:  r   r-   r#   <module>rG     s      	   	 
      9 ,  4 E E  H H 			8	$
#$DRjCL:&;/|83vCBJ/=d zF r-   