+
    9i(                    L   R t ^ RIt^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RIH	t	 ^ RI
Ht ^ RIt^ RIt^ RIt^ RIHtHt ^ RIHt ^ RIHtHtHt ^ RIHt ^ R	IHtHtHtHtHtH t H!t!H"t" ^ R
I#H$t$H%t%H&t&H't' ^ RI(H)t* ^ RI+H,t, ^ RI-H.t/ ^ RI0H1t1H2t2 ]Pf                  ! R4      t4 ! R R]4      t5RTR R llt6R R lt7R R lt8RUR R llt9RUR R llt:R R lt;R R lt<R  R! lt=R" R# lt>RVR$ R% llt?R& R' lt@RWR( R) lltAR* tBR+ tCR, tDR- R. ltER/ R0 ltFR1 R2 ltGR3 R4 ltH^ . 3R5 R6 lltIR7 R8 ltJR9 R: ltKRXR; R< lltLR= R> ltMR? R@ ltNRA RB ltORURC RD lltPRURE RF lltQRG tR]5P                  3RH RI lltTRJ RK ltURL tVRYRM RN lltWRTRO RP lltXRZRQ RR llt)]YRS8X  d
   ])! 4        R# R# )[a  
This converts GPT2 or T5 model to onnx with beam search operator.

Example 1: convert gpt2 model with beam search:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx

Example 2: convert gpt2 model with beam search containing specific cuda optimizations:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx --use_gpu                       --past_present_share_buffer --use_decoder_masked_attention

Example 3: convert gpt2 model with beam search with mixed precision and enable SkipLayerNorm strict mode:
    python convert_generation.py -m gpt2 --output gpt2_beam_search.onnx --use_gpu -p fp16 --use_sln_strict_mode

Example 4: convert T5 model with beam search in two steps:
    python -m models.t5.convert_to_onnx -m t5-small
    python convert_generation.py -m t5-small --model_type t5                     --decoder_onnx ./onnx_models/t5-small_decoder.onnx                       --encoder_decoder_init_onnx ./onnx_models/t5-small_encoder.onnx          --output ./onnx_models/t5_small_beam_search.onnx

Example 5: convert T5 model with beam search. All in one step:
    python convert_generation.py -m t5-small --model_type t5 --output t5_small_beam_search.onnx

Example 6: convert T5 model with beam search containing specific cuda optimizations. All in one step:
    python convert_generation.py -m t5-small --model_type t5 --output t5_small_beam_search.onnx           --use_gpu --past_present_share_buffer --use_decoder_masked_attention

Example 7: convert MT5 model with external data file like mt5-base-beamsearch.onnx.data in below example.
    python convert_generation.py -m google/mt5-base --model_type mt5 --output mt5-base-beamsearch.onnx -e

Example 8: convert gpt2 model with greedy search:
    python convert_generation.py -m gpt2 --output gpt2_greedy_search.onnx --num_beams 1 --num_return_sequences 1

Example 9: convert gpt2 model with sampling:
    python convert_generation.py -m gpt2 --output gpt2_sampling.onnx --num_beams 1 --num_return_sequences 1 --top_p 0.6
N)Enum)Path)Any)	Precisionsetup_logger)NumpyHelper)
GraphProto
ModelProtoTensorProto	OnnxModel)
GPT2ConfigGPT2LMHeadModelGPT2Tokenizer	MT5ConfigMT5ForConditionalGenerationT5ConfigT5ForConditionalGenerationT5Tokenizer)GraphOptimizationLevelInferenceSessionSessionOptionsget_available_providers)main)PRETRAINED_GPT2_MODELS)export_onnx_models)PRETRAINED_MT5_MODELSPRETRAINED_T5_MODELS c                   2   a  ] tR t^Zt o RtRtRtR tRtV t	R# )GenerationTypebeam_searchgreedy_searchsamplingc                    V P                   # N)value)selfs   &i/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/convert_generation.py__str__GenerationType.__str___   s    zz     N)
__name__
__module____qualname____firstlineno__
BEAMSEARCHGREEDYSEARCHSAMPLINGr)   __static_attributes____classdictcell__)__classdict__s   @r(   r    r    Z   s     J"LH r+   r    c                h    V ^8  d   QhR\         \        ,          R,          R\        P                  /# )   argvNreturn)liststrargparse	Namespace)formats   "r(   __annotate__r@   c   s-     Q Q$s)d* Qh6H6H Qr+   c                   \         P                  ! 4       pVP                  R4      pVP                  RRR\        RRP                  \        \        ,           \        ,           4      ,           R7       VP                  RR	\        R
. RORRP                  . RO4      ,           R7       VP                  RR	\        \        P                  P                  RR4      RR7       VP                  RR	\        RRR7       VP                  RR	\        RRR7       VP                  RR	RRR7       VP                  R	R7       VP                  R4      pVP                  RR\        RR7       VP                  RR R	\        \        P                  P                  \        P                  P                  \        P                  P                  .R!R7       VP                  R"R#R	R$R%.R&R'7       VP                  R(R)R	RR*R7       VP                  R	R+7       VP                  R,R-R	RR.R7       VP                  R	R/7       VP                  R0R1R	RR2R7       VP                  R	R37       VP                  R4R5R	RR6R7       VP                  R	R77       VP                  R8R9R	RR:R7       VP                  R	R;7       VP                  R<R	RR=R7       VP                  R	R>7       VP                  R?4      pVP                  R@R	RRAR7       VP                  R	RB7       VP                  RCR	RRDR7       VP                  R	RE7       VP                  RFR	RRG7       VP                  R	RH7       VP                  RI\         R	^ RJRK7       VP                  RLR	RRMR7       VP                  R	RN7       VP                  ROR	RRPR7       VP                  R	RQ7       VP                  RRR	RRSR7       VP                  R	RT7       VP                  RUR	RRVR7       VP                  R	RW7       VP                  RXR	RRYR7       VP                  R	RZ7       VP                  R[R	RR\R7       VP                  R	R]7       VP                  R^R	RR_R7       VP                  R	R`7       VP                  Ra4      pVP                  Rb\         R	^RcRK7       VP                  Rd\         R	^2ReRK7       VP                  Rf\         R	^RgRK7       VP                  Rh\         R	^RiRK7       VP                  Rj\"        R	^RkRK7       VP                  Rl\"        R	^RmRK7       VP                  Rn\"        R	RoRpRK7       VP                  Rq\"        R	RoRrRK7       VP                  Rs\"        R	\#        Rt4      ) RuRK7       VP                  Rv\         R	^RwRK7       VP                  Rx\"        R	RyRzRK7       VP                  R{\         R	^ R|RK7       VP                  R}\         R	RR~RK7       VP                  R\         R	RRRK7       VP                  R\         R	RRRK7       VP                  R4      pVP                  RR	RRR7       VP                  R	R7       VP                  RR	RRR7       VP                  R	R7       VP                  RR	RRR7       VP                  R	R7       VP                  RR	RRR7       VP                  R	R7       VP                  RR	RRR7       VP                  R	R7       VP                  RR	\         ^RR7       VP                  RR	RRR7       VP                  R	R7       VP%                  V 4      pV# )zParse arguments

Args:
    argv (Optional[List[str]], optional): _description_. Defaults to None.

Returns:
    argparse.Namespace: Parsed arguments.
zInput optionsz-m--model_name_or_pathTzEPytorch model checkpoint path, or pretrained model name in the list: , )requiredtypehelpz--model_typeFgpt2z*Model type (default is gpt2) in the list: )rD   rE   defaultchoicesrF   --cache_dir.cache_modelsz%Directory to cache pre-trained models)rD   rE   rH   rF   z--decoder_onnxr   zLPath of onnx model for decoder. Specify it when you have exported the model.z--encoder_decoder_init_onnxzgPath of ONNX model for encoder and decoder initialization. Specify it when you have exported the model.z	--verbose
store_truezPrint more information)rD   actionrF   )verbosezOutput options--outputz,Output path for onnx model with beam search.z-p--precisionzTPrecision of model to run. fp32 for full precision, fp16 for half or mixed precisionz-b--op_block_list*autozDisable certain onnx operators when exporting model to onnx format. When using defaultvalue for gpt2 type of model fp16 precision, it will be set to ["Add", "LayerNormalization", "SkipLayerNormalization", "FastGelu"]. Other situation, it will be set to [])rD   nargsrH   rF   z-e--use_external_data_formatz!save external data for model > 2G)use_external_data_formatz-sz--run_shape_inferencezrun shape inference)run_shape_inferencez-dpvsz--disable_pad_vocab_sizezDo not pad logits MatMul weight to be a multiple of 8 along the dimension where dim value is the vocab size. The logits MatMul may hence be of poor performance for fp16 precision.)disable_pad_vocab_sizez-dsgdz,--disable_separate_gpt2_decoder_for_init_runzDo not create separate decoder subgraphs for initial and remaining runs. This does not allow for optimizations based on sequence lengths in each subgraph)*disable_separate_gpt2_decoder_for_init_runz-iz--disable_shared_initializerszdo not share initializers in encoder and decoder for T5 or in the init decoder and decoder for GPT2. It will increase memory usage of t5/mt5/gpt2 models.)disable_shared_initializersz--encoder_decoder_initzbAdd decoder initialization to encoder for T5 model. This is legacy format that will be deprecated.)encoder_decoder_initz6Beam search parameters that stored in the output modelz--output_sequences_scoreszoutput sequences scores)output_sequences_scoresz--output_token_scoreszoutput token scores)output_token_scoresz--early_stopping)rD   rN   )early_stoppingz--no_repeat_ngram_sizezNo repeat ngram size)rE   rD   rH   rF   z--vocab_maskz\Enable vocab_mask. This mask applies only to every generated token to filter some bad words.)
vocab_maskz--past_present_share_bufferzWUse shared buffer for past and present, currently work for gpt2 greedy/sampling search.)past_present_share_bufferz--use_decoder_masked_attentionzUses `DecoderMaskedSelfAttention` or `DecoderMaskedMultiHeadAttention` to optimize the decoding Attention computation. Must be used with `past_present_share_buffer`. Currently, only Attention head sizes of 32, 64 and 128 are supported.)use_decoder_masked_attentionz--prefix_vocab_maskzeEnable prefix_vocab_mask. This mask can be used to filter bad words in the first generated token only)prefix_vocab_maskz--custom_attention_maskz]Enable custom_attention_mask. This mask can be used to replace default encoder attention mask)custom_attention_maskz--presence_maskz!Presence mask for custom sampling)presence_maskz--seedzRandom seed for sampling op)seedzYBeam search parameters not stored in the output model, for testing parity and performancez--min_lengthzMin sequence lengthz--max_lengthzMax sequence lengthz--num_beamsz	Beam sizez--num_return_sequencesz&Number of return sequence <= num_beamsz--length_penaltyz<Positive. >1 to penalize and <1 to encourage short sentence.z--repetition_penaltyz-Positive. >1 to penalize and <1 to encourage.z--temperature      ?z6The value used to module the next token probabilities.z--top_pzTop P for samplingz--filter_valueInfzFilter value for Top P samplingz--min_tokens_to_keepzAMinimum number of tokens we keep per batch example in the output.z--presence_penalty        z%presence penalty for custom sampling.z--customz&If 1 customized top P logic is appliedz--vocab_sizezIVocab_size of the underlying model used to decide the shape of vocab maskz--eos_token_idzKcustom eos_token_id for generating model with existing onnx encoder/decoderz--pad_token_idzKcustom pad_token_id for generating model with existing onnx encoder/decoderz0Other options for testing parity and performancez--use_sln_strict_modez_Enable strict mode for SLN in CUDA provider. This ensures a better accuracy but will be slower.)use_sln_strict_mode	--use_gpuz)use GPU for inference. Required for fp16.)use_gpuz--disable_parityzdo not run parity test)disable_parityz--disable_perf_testzdo not run perf test)disable_perf_testz--torch_performanceztest PyTorch performance)torch_performancez--total_runsz4Number of times of inference for latency measurementz--save_test_dataz-save test data for onnxruntime_perf_test tool)save_test_data)rG   t5mt5)r=   ArgumentParseradd_argument_groupadd_argumentr<   joinr   r   r   ospathset_defaultsr   FLOAT32r&   FLOAT16intfloat
parse_args)r9   parserinput_groupoutput_groupmodel_groupbeam_parameters_group
test_groupargss   &       r(   parse_argumentsr   c   s    $$&F++O<KT
))*-AADYY
Z[   %9DIIF[<\\   S.14   [   %v   %	   &,,-=>L;	   !!''""(()*;*;*A*ABc   X  	 $0   u="   %8"b   U;6G   O'E   %@ q	   59++,deK#&	   U;"	   7/%UE2 #   k	   .%f	   u=(	   %@t	   u5!l	   590	   51*	   %("55c &&~C%YZav&w&&~C%Y[bw&x&&}3XY`k&l&& 5 '  &&K '  &&< '  &&E '  &&! '  &&u. '  &&P '  &&4 '  &&5 '  &&X '  &&Z '  &&Z '  **+]^Jn	   68	   E*%	   51#	   e4'	   e4C   <	   51T"DKr+   c                8    V ^8  d   QhR\         P                  /# r8   r   r=   r>   )r?   s   "r(   r@   r@     s     *) *)x)) *)r+   c                   V P                   pRVRV P                  RRV P                  RRRRR	.pV P                  '       d   VP	                  R
V P                  .4       V P
                  '       d   VP                  R4       V P                  '       d   VP                  R4       \        V P                  4      '       d.   VP	                  R.4       VP	                  V P                  4       V P                  \        P                  P                  8X  d   V P
                  '       g   Q R4       hV P                  '       d   \        P                  RV 24       \!        VR7       R# )zeConvert GPT-2 model to onnx

Args:
    args (argparse.Namespace): arguments parsed from command line
rB   rP   z--optimize_onnxrQ   z--test_runs1z--test_cases10z--overwriterJ   rk   rV   rR   zEfp16 or mixed precision model cannot run in CPU. Please add --use_gpuzarguments for convert_to_onnx:)r9   N)model_name_or_pathdecoder_onnx	precision	cache_dirextendrl   appendrW   lenop_block_listr   r|   r&   rO   loggerinfoconvert_gpt2_to_onnx)r   
model_name	argumentss   &  r(   gpt2_to_onnxr     s$    ((J 	I ~~~-89|||%$$$56
4+,-++,~~**000|||ddd|
 |||4YK@Ai(r+   c                8    V ^8  d   QhR\         P                  /# r   r   )r?   s   "r(   r@   r@   $  s     ! !X'' !r+   c                H   \        V P                  V P                  \        V P                  4      P
                  V P                  V P                  V P                  \        P                  P                  8g  V P                  RRRRRV P                  V P                  V P                  \        P                  P                  8H  R7      p\        P                  RV^ ,           24       \        P                  RV^,           24       V^ ,          V n        V^,          V n        R# )zbConvert T5 model to onnx

Args:
    args (argparse.Namespace): arguments parsed from command line
FT)r   r   
output_dirrl   rW   optimize_onnxr   rO   use_decoder_start_token	overwritedisable_auto_mixed_precisionuse_int32_inputs
model_typer\   force_fp16_iozonnx model for encoder: zonnx model for decoder: N)export_t5_onnx_modelsr   r   r   outputparentrl   rW   r   r   r|   r&   r   r\   r   debugencoder_decoder_init_onnxr   )r   pathss   & r(   
t5_to_onnxr   $  s     "22..$++!%!>!>~~):):)@)@@.. %%*??!66~~):):)@)@@E$ LL+E!H:67
LL+E!H:67%*1XD"aDr+   c                0    V ^8  d   QhR\         R\        /# )r8   	onnx_pathrW   r<   bool)r?   s   "r(   r@   r@   B  s     O Os Od Or+   c                    ^ RI Hp \        P                  ! V RR7      pVP	                  VRRR7      pV'       d   \
        P                  ! W@VR7       R# \        P                  R4       R# )	zShape inference on an onnx file, which will be overwritten.

Args:
    onnx_path (str): Path of onnx model
    use_external_data_format(bool): output tensors to external data or not.
)SymbolicShapeInferenceTload_external_dataF)
auto_mergeguess_output_ranksave_as_external_dataz4Failed to run symbolic shape inference on the model.N)	&onnxruntime.tools.symbolic_shape_inferr   onnx
load_modelinfer_shapesr   saver   warning)r   rW   r   modelouts   &&   r(   shape_inferencer   B  sO     NOOI$?E
 
-
-eX]
-
^C
s=UVMNr+   c                <    V ^8  d   QhR\         R\        R\        /# )r8   r   rW   r:   r   )r?   s   "r(   r@   r@   T  s'     K KC K4 K[_ Kr+   c                l   \         P                  ! V RR7      pVP                  P                  ^ ,          P                  p\        V4      pVP                  4       pW59   g   Q hWS,          pVP                  R8w  d   R# RpVP                  VP                  ^,          4      pVfD   VP                  VR^4      p	V	f   R# VP                  V	P                  ^ ,          4      pVf   R# RpVP                  \        P                  P                  8w  d   R# \        VP                   4      ^8w  d   R# VP                   ^,          p
V
^,          ^ 8X  d   R# \"        P$                  ! V
^,          4      ^,          pW,
          pVP&                  '       Ed	   V'       dw   \(        P*                  ! VP                   ^ ,          V3\(        P,                  R7      p\(        P.                  ! \0        P2                  ! V4      V3^R7      pWP                   ^&   Mt\(        P*                  ! WP                   ^,          3\(        P,                  R7      p\(        P.                  ! \0        P2                  ! V4      V3^ R7      pWP                   ^ &   VP5                  4       Vn        MR# \
        P6                  ! W VR7       R# )	zPad the logits MatMul weight in the provided decoder model, which will be overwritten.

Args:
    onnx_path (str): Path of onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   MatMulF	Transposedtypeaxisr   )r   r   graphr   namer   output_name_to_nodeop_typeget_initializerinputmatch_parent	data_typer
   DataTyper|   r   dimsmathceilraw_datanpzerosfloat16concatenater   to_arraytobytesr   )r   rW   decoder_model_protologits_output_namedecoder_modelr   matmul_nodepad_along_axis_1logits_weighttranspose_before_matmulactual_vocab_sizepadded_vocab_sizepaddingpadding_dataweight_with_paddings   &&             r(   pad_weights_of_logits_matmulr   T  sC    //)M,2299!<AA12M';;=444%9Kh&
 !11+2C2CA2FGM"/"<"<[+WX"Y"*%556M6S6STU6VW   +"6"6">">> =!# &**1-A!#		"3a"781<3G 88]%7%7%:G$DBJJWL"$..+2F2F}2UWc1dkl"m$5q!88W.@.@.C$DBJJWL"$..+2F2F}2UWc1dkl"m$5q!!4!<!<!> NN&Iabr+   c                H    V ^8  d   QhR\         R\        R\        R\        /# )r8   
model_pathrl   rj   r:   )r<   r   r   )r?   s   "r(   r@   r@     s)      3  D Ue r+   c                V   \        4       p\        P                  Vn        V'       d   RR.MR.pV'       da   R\	        4       9  d   \        R4      h\        P                  R4       V'       d*   RR/pRV/pV Uu. uF  qwV9   d   WvV,          3MTNK  	  pp\        WVR7      pV# u upi )a`  Create OnnxRuntime session.

Args:
    model_path (str): onnx model path
    use_gpu (bool): use GPU or not
    use_sln_strict_mode (bool): use strict mode for skip layer normalization or not

Raises:
    RuntimeError: CUDAExecutionProvider is not available when --use_gpu is specified.

Returns:
    onnxruntime.InferenceSession: The created session.
CUDAExecutionProviderCPUExecutionProviderz5CUDAExecutionProvider is not available for --use_gpu!zuse CUDAExecutionProvider"enable_skip_layer_norm_strict_modeT)	providers)	r   r   ORT_DISABLE_ALLgraph_optimization_levelr   RuntimeErrorr   r   r   )	r   rl   rj   sess_optionsexecution_providerscuda_provider_optionsprovider_optionsr   ort_sessions	   &&&      r(   create_ort_sessionr     s     "#L,B,R,RL)OV24JK]s\t"*A*CCVWWKK34%I4$P! 79NOat#atY]:J2J-.PTTat   # #:GZ[K#s   :B&c                D    V ^8  d   QhR\         P                  R\        /# r8   r   r   r   r   r   )r?   s   "r(   r@   r@     s     5 5 5I 5r+   c           
     ^   V\         P                  P                  8H  p\        V P                  4      pV^,
          pV^8  g   Q h. RO\        V4       Uu. uF  pRV 2NK
  	  up,           p\        V P                  4      \        V4      8w  d.   \        R\        V4       R\        V P                  4       24      h\        V4       F  w  rWV P                  V,          P                  V8w  d0   \        RV RV RV P                  V,          P                   24      h\        P                  pV^8  d)   V'       d   \        P                  M\        P                  pV P                  V,          P                  P                  P                  p	W8w  g   K  \        RV RV RV	 24      h	  \        P!                  R4       R.\        V4       Uu. uF  pR	V 2NK
  	  up,           p
\        V P"                  4      \        V
4      8w  d.   \        R
\        V
4       R\        V P"                  4       24      h\        V
4       F  w  r[V P"                  V,          P                  V8w  d0   \        RV RV RV P"                  V,          P                   24      hV'       d   \        P                  M\        P                  pV P"                  V,          P                  P                  P                  pW8w  g   K  \        RV RV RV 24      h	  \        P!                  R4       R# u upi u upi )a  Verify GPT-2 subgraph

Args:
    graph (onnx.GraphProto): onnx graph of GPT-2
    precision (Precision): Precision (FLOAT16 or FLOAT32) of the model.

Raises:
    ValueError: Number of inputs not expected.
    ValueError: Input name is not expected.
    ValueError: Input data type is not expected.
    ValueError: Number of outputs not expected.
    ValueError: Output name is not expected.
    ValueError: Output data type is not expected.
past_ Number of inputs expected to be . Got Input  is expected to be $ is expected to have onnx data type z:Verifying GPT-2 graph inputs: name and data type are good.logitspresent_!Number of outputs expected to be Output z;Verifying GPT-2 graph outputs: name and data type are good.N)	input_idsposition_idsattention_mask)r   r|   r&   r   r   range
ValueError	enumerater   r
   INT32FLOATrE   tensor_type	elem_typer   r   r   )r   r   
is_float16input_countlayer_countiexpected_inputsexpected_inputexpected_type
input_typeexpected_outputsexpected_outputoutput_types   &&           r(   verify_gpt2_subgraphr    s    i//555Jekk"K/K!E^cdo^pHq^pYZ5QRPS^pHqqO
5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh#))63=K//;CTCTM[[^((44>>
&vaS(L]O[ablamnoo 8 KKLM zU;=O$P=Oxs^=O$PP
5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkk/9++{?P?Pll1o**66@@'vaS(L]O[abmanopp : KKMN A Ir" %Qs   L%L*c                D    V ^8  d   QhR\         P                  R\        /# r   r   )r?   s   "r(   r@   r@     s&     Io Iodoo Io) Ior+   c           
        V\         P                  P                  8H  pV'       d   \        P                  M\        P                  p\        V P                  4      pV^,
          ^,          pV^8  g   Q hRR.p\        V4       F+  pVP                  RV 24       VP                  RV 24       K-  	  \        V4       F+  pVP                  RV 24       VP                  RV 24       K-  	  \        V P                  4      \        V4      8w  d.   \        R\        V4       R\        V P                  4       24      h\        V4       F  w  rxV P                  V,          P                  V8w  d0   \        R	V R
V RV P                  V,          P                   24      hV^8  d   \        P                  MTp	V P                  V,          P                  P                  P                  p
W8w  g   K  \        R	V RV	 RV
 24      h	  R.p\        V4       F+  pVP                  RV 24       VP                  RV 24       K-  	  \        V P                   4      \        V4      8w  d.   \        R\        V4       R\        V P                   4       24      h\        V4       F  w  r|V P                   V,          P                  V8w  d0   \        RV R
V RV P                   V,          P                   24      hV P                   V,          P                  P                  P                  pW8w  g   K  \        RV RV RV 24      h	  R# )  Verify T5 decoder subgraph

Args:
    graph (onnx.GraphProto): onnx graph of T5 decoder
    precision (Precision): Precision (FLOAT16 or FLOAT32) of the model.

Raises:
    ValueError: Number of inputs not expected.
    ValueError: Input name is not expected.
    ValueError: Input data type is not expected.
    ValueError: Number of outputs not expected.
    ValueError: Output name is not expected.
    ValueError: Output data type is not expected.
r  encoder_attention_maskpast_key_self_past_value_self_past_key_cross_past_value_cross_r   r   r  r  r  r  present_key_self_present_value_self_r  r  N)r   r|   r&   r
   r  r   r   r  r   r  r  r   r  rE   r  r  r   )r   r   r  
float_typer  r  r  r  r  r  r  r  r  r  s   &&            r(   verify_t5_decoder_subgraphr)    s	    i//555J(2$$8I8IJekk"K?q(K! #$<=O;s34!1!56   ;45!21#67   5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh-.U))
[[^((44>>
&vaS(L]O[ablamnoo 8 !z;"3A3 78"5aS 9:   5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkkll1o**66@@$wqc)Mj\Y_`k_lmnn :r+   c                D    V ^8  d   QhR\         P                  R\        /# r   r   )r?   s   "r(   r@   r@   G  s&     Ya Ya4?? Yay Yar+   c           
     $   V\         P                  P                  8H  pRV P                  ^ ,          P                  9   p. ROpV'       d
   VR,          p\        V P                  4      \        V4      8w  d.   \        R\        V4       R\        V P                  4       24      h\        V4       F  w  rVV P                  V,          P                  V8w  d0   \        RV RV RV P                  V,          P                   24      h\        P                  pV P                  V,          P                  P                  P                  pW8w  g   K  \        RV RV RV 24      h	  V'       d   \        V P                  4      ^,          ^ 8X  g   Q h\        V P                  4      ^,          p	V	^8  g   Q h. p
\        V	4       F+  pV
P                  RV 24       V
P                  R	V 24       K-  	  M\         P#                  R
4       \        V P                  4      ^,
          ^,          ^ 8X  g   Q h\        V P                  4      ^,
          ^,          p	V	^8  g   Q hRR.p
\        V	4       F+  pV
P                  RV 24       V
P                  RV 24       K-  	  \        V	4       F+  pV
P                  RV 24       V
P                  R	V 24       K-  	  \        V P                  4      \        V
4      8w  d.   \        R\        V
4       R\        V P                  4       24      h\        V
4       F  w  r[V P                  V,          P                  V8w  d0   \        RV RV RV P                  V,          P                   24      hV'       d   \        P                  M\        P$                  pV P                  V,          P                  P                  P                  pW8w  g   K  \        RV RV RV 24      h	  \         P'                  R4       R# )r   cross:Nr8   Nr   r   r  r  r  present_key_cross_present_value_cross_zZThis format is deprecated. Please export T5 encoder in new format with only cross outputs.r  encoder_hidden_statesr&  r'  r  r  zMT5 encoder graph verified: name and data type of inputs and outputs are good.N)encoder_input_idsr!  decoder_input_ids)r   r|   r&   r   r   r   r   r  r  r
   r  rE   r  r  r  r   r   r   r  r   )r   r   r  
new_formatr  r  r  r  r  r  r  r  r  s   &&           r(   'verify_t5_encoder_decoder_init_subgraphr3  G  s    i//555JELLO000JO
 )"-
5;;3//;C<P;QQWX[\a\g\gXhWijkk&7;;q>.0vaS(;N;K6RWR]R]^_R`ReReQfghh#))[[^((44>>
&vaS(L]O[ablamnoo 8 5<< 1$)))%,,'1,a {#A##&8$<=##&:1#$>? $ 	stELL!A%*a///5<<(1,2a %&=>{#A##&7s$;<##&9!$=> $ {#A##&8$<=##&:1#$>? $ 5<<C 011<SAQ=R<SSYZ]^c^j^jZkYlmnn'(89<<??2wqc)<_<MVTYT`T`abTcThThSijkk/9++{?P?Pll1o**66@@'wqc)Mm_\bcnbopqq : KK_`r+   c                |    V ^8  d   QhR\         R\         R\        R\        R\        R,          R\        R,          /# )r8   graph1graph2shared_prefixmin_elementssignature_cache1Nsignature_cache2)r   r<   r}   dict)r?   s   "r(   r@   r@     sT     g! g!g!g! g! 	g!
 Tkg! Tkg!r+   c                0
   / p/ p. p. p	. p
V P                    EF  pVP                  '       d   \        VP                  4      V8  g   K2  VP                    F  pVP                  '       d   \        VP                  4      V8  g   K1  \        P                  ! WWE4      '       g   KP  W,P
                  ,           WkP
                  &   VP                  V4       VP
                  V9  dC   W,P
                  ,           pWVP
                  &   V	P                  V4       V
P                  V4        EK  	  EK  	  \        P                  RV
 24       V P                   F_  p\        \        VP                  4      4       F:  pVP                  V,          V
9   g   K  \        RVP                  V,           24      h	  Ka  	  VP                   F_  p\        \        VP                  4      4       F:  pVP                  V,          V
9   g   K  \        RVP                  V,           24      h	  Ka  	  V	 F  pVP                   P                  V4       K   	  VP                   F.  pVP
                  V9   g   K  VVP
                  ,          Vn        K0  	  VP                   F  p\        \        VP                  4      4       F  pVP                  V,          V9   g   K  W~P                  V,          ,          p\        P                  RVP
                   RV RVP                  V,           RV 24       VVP                  V&   K  	  K  	  V F  pV P                   P                  V4       K   	  V P                   F.  pVP
                  V9   g   K  VVP
                  ,          Vn        K0  	  V P                   F  p\        \        VP                  4      4       F  pVP                  V,          V9   g   K  WnP                  V,          ,          p\        P                  RVP
                   RV RVP                  V,           RV 24       VVP                  V&   K  	  K  	  V	 F  pVVP
                  ,          Vn        K  	  V	 F  p\         P"                  P%                  V4      P&                  p\         P(                  P+                  VP
                  VP,                  V4      pV P                  P                  V4       VP                  P                  V4       K  	  V	# )	ae  Remove initializers with same value from two graphs.

Args:
    graph1 (GraphProto): the first graph to process
    graph2 (GraphProto): the second graph to process
    shared_prefix (str): add prefix to the shared initializers among two graphs
    min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.
    signature_cache1 (dict): Optional dictionary to store data signatures of tensors in graph1 in order to speed up comparison
    signature_cache2 (dict): Optional dictionary to store data signatures of tensors in graph2 in order to speed up comparison
zshared initializers:zname is found in graph 1: zname is found in graph 2: zgraph 2 rename node z input z from z to zgraph 1 rename node )initializerr   sumr   has_same_valuer   r   r   r   noder  r   r   r   remove
value_infor   numpy_helperr   shapehelpermake_tensor_value_infor   )r5  r6  r7  r8  r9  r:  mapping_initializers_1mapping_initializers_2shared_initializers_1shared_initializers_2shared_initializers_namesinitializer1initializer2shared_namer@  jr=  rB  new_namerD  s   &&&&&&              r(   remove_shared_initializersrQ    s   &   "**!!!c,*;*;&<&L"..L %%%#l.?.?*@L*P''DTgg<IL]L]<]&'8'89%,,\:$$,BB"/2C2C"CK@K<+<+<=)00>-44[A /	 +& LL'(A'BCD s4::'Azz!} 99"%?

1#OPP (  s4::'Azz!} 99"%?

1#OPP (  -!!+. - ''
??444Z__EJO (
 s4::'Azz!} 661**Q-@3DII;gaStzzZ[}o]abjaklm (

1	 (  -!!+. - ''
??444Z__EJO (
 s4::'Azz!} 661**Q-@3DII;gaStzzZ[}o]abjaklm (

1	 (  -1+2B2BC - -!!**;7==[[778H8H+J_J_afg
  ,  , - ! r+   c                0    V ^8  d   QhR\         R\         /# )r8   encoder_modelr   r	   )r?   s   "r(   r@   r@     s      : j r+   c                 0   \        V 4      p\        V4      pVP                  R 4       VP                  R4       / / rTVP                  V4       VP                  V4       \        VP                  P
                  VP                  P
                  RVVR7      pV# )e_d_s_)r7  r9  r:  )r   add_prefix_to_namesremove_duplicated_initializerrQ  r   r   )rS  r   encoderdecoderr9  r:  initializerss   &&     r(   get_shared_initializersr^    s    &G&G%%)+R&))*:;))*:;-))L r+   c                R    V ^8  d   QhR\         R\        R\        \        ,          /# )r8   r   r8  r:   )r   r}   r;   r
   )r?   s   "r(   r@   r@     s+       
+r+   c                   . pV P                    FB  pVP                  '       d   \        VP                  4      V8  g   K1  VP                  V4       KD  	  V F  pV P                   P	                  V4       K   	  V F|  p\
        P                  P                  V4      P                  p\
        P                  P                  VP                  VP                  V4      pV P                  P                  V4       K~  	  V# )aF  Remove initializers of a graph, when they have number of elements larger than a threshold.

Args:
    graph (GraphProto): the graph.
    min_elements (int, optional): minimal number of elements for initializers to be considered. Defaults to 1024.

Returns:
    List[TensorProto]: initializers that are removed from the graph.
)r=  r   r>  r   rA  r   rC  r   rD  rE  rF  r   r   rB  )r   r8  moved_initializerstensorr=  rD  rB  s   &&     r(   move_initializersrc    s     ##FKK 0L @!!&) $
 *  - * *!!**;7==[[778H8H+J_J_afg

+ *
 r+   c                   V P                   ^ 8X  d   \        RV P                   R24      hV P                   ^8X  d   V P                  pEM5V P                   ^8X  d   V P                  pEMV P                   ^8X  d   V P
                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMbV P                   ^	8X  d   V P                  pMDV P                   ^
8X  d   V P                  pM&\        RV P                   RV P                    R24      hV P                  V3# )z
Convert attribute to kwarg format for use with onnx.helper.make_node.
    :parameter attribute: attribute in AttributeProto format.
    :return: attribute in {key: value} format.
z
attribute z does not have type specified.z has unsupported type rK   )rE   r  r   fr  stgfloatsintsstringstensorsgraphs)	attributer&   s   & r(   _attribute_to_pairro  >  s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r+   c                     / pV P                    F"  p\        V4      w  r4VP                  W4/4       K$  	  V P                  '       d   VP                  R V P                  /4       V# )domain)rn  ro  updaterq  )r@  kwargsattrkeyr&   s   &    r(   	kwargs_ofrv  c  sT    F)$/sl#  {{{x-.Mr+   c                     \        V P                  P                  P                  P                   Uu. uF,  qP
                  '       d   VP
                  MVP                  NK.  	  up4      # u upi r%   )tuplerE   r  rD  dim	dim_param	dim_value)vids   & r(   shape_ofr~  m  sJ    I\I\IbIbIfIfgIfA+++!++AKK?Ifghhgs   2A+c                $    V ^8  d   QhR\         /# r8   subgr   )r?   s   "r(   r@   r@   q  s     . .J .r+   c                 J   ^p^p. p\        V P                  4       F  w  rEWA8  dw   \        V4      p\        P                  P                  VP                  VP                  P                  P                  V^ ,          V^,          V^,          RV^,          .R7      pVP                  V.4       K  	  VP                  \        P                  P                  R\        P                  P                  ^.R7      .4       V P                  R4       V P                  P                  V4       . p\        V P                  4       F  w  rEWB8  dw   \        V4      p\        P                  P                  VP                  VP                  P                  P                  V^ ,          V^,          V^,          RV^,          .R7      pVP                  V.4       K  	  V P                  R4       V P                  P                  V4       . pV P                   F  p	T	p
V	P                   R8X  d   \#        V	4      pVP%                  R^/4       . pVP                  V	P                  4       \'        V4      ^8  d   VP                  R	.4       K$  \'        V4      ^8  d   VP                  R.4       \        P                  P(                  ! RWP                  3R
V	P                  /VB p
VP                  V
.4       K  	  V P                  R4       V P                  P                  V4       V # )   max_seq_lenr  rD  past_sequence_lengthrD  r   r   	Attentionra   r   r   r@  )r  r   r~  r   rE  rF  r   rE   r  r  r   r
   r  
ClearFieldr   r@  r   rv  rr  r   	make_node)r  input_past_0output_past_0
new_inputsr  r|  rD  new_outputs	new_nodesr@  new_noders  niss   &            r(   1update_decoder_subgraph_past_present_share_bufferr  q  s   LMJ4::&RLE33''--77Qxq58]E!HM 4 B
 	2$ ' t{{99:PRVRbRbRhRhqrps9tuvOOGJJj!K4;;'RLE33''--77Qxq58]E!HM 4 B
 	B4  ( 	OOHKK{#I		<<;&t_FMM6:;CJJtzz"c(Q,

B4 3x!|

234{{,,[#{{eQUQZQZe^deH($  	OOFIIYKr+   c                H    V ^8  d   QhR\         R\        R\        R\        /# )r8   r  is_beam_searchswitch_attentionr:   )r   r   )r?   s   "r(   r@   r@     s2     S S
S&*S>BS	Sr+   c                "   V'       d   . p\        V P                  4       F  w  rEVP                  V.4       K  	  VP                  \        P                  P                  R\        P                  P                  ^.R7      .4       VP                  \        P                  P                  R\        P                  P                  . ROR7      .4       V P                  R4       V P                  P                  V4       V'       Ed   . ROp. pV P                   EFE  pVP                  R8X  Ed   \        V4      p	V	P                  4        F8  p
V
R8X  d     R# W9  g   K  V
R8w  d   \        P                  R	V
 R
24       W K:  	  . pVP                  VP                  4       V'       di   \        V4      ^8  d   VP                  R.4       K$  \        V4      ^8  d   VP                  R.4       \        V4      ^	8  d   VP                  R.4       \        P                  P                   ! RVVP"                  3RVP$                  /V	B pVP                  V.4       EKH  	  V P                  R4       V P                  P                  V4       R# )a?  Update the Attention nodes to DecoderMaskedSelfAttention.

Args:
    subg (GraphProto): GraphProto of the decoder subgraph
    is_beam_search (bool): Boolean specifying if the sampling algo is BeamSearch
    switch_attention (bool): Boolean specifying if `Attention` is to be switched with `DecoderMaskedSelfAttention`

beam_widthr  cache_indirectionr   r  qkv_hidden_sizesFunidirectionalzRemoving attribute: zB from Attention node while switching to DecoderMaskedSelfAttentionr   DecoderMaskedSelfAttentionr   r@  T
batch_sizer  r  ra   	num_headsscalemask_filter_valuerq  )r  r   r   r   rE  rF  r
   r  r  r@  r   rv  copyr   r   r   r  r   r   )r  r  r  r  _ir|  'decoder_masked_attention_supported_attrr  r@  rs  kr  s   &&&         r(   4update_decoder_subgraph_use_decoder_masked_attentionr    s)    


+FBrd# , 	4;;==lDL\L\LbLbkljm=nop22'$$**E 3 	
 	 

*%3
/ 	IID||{*"4A ..$G  00"NN"6qc9{ | #I! '$ 

4::& "c(Q,

B4(3x!|

L>23x!|

$7#89{{,,0KK 	
  dV$Q R 			#r+   c                $    V ^8  d   QhR\         /# r  r  )r?   s   "r(   r@   r@     s     f3 f3* f3r+   c                
   \        4       p. p\        V P                  4       UUu/ uF  w  r4VP                  VbK  	  ppp/ p/ pV P                   Fe  pVP                   F1  p	V	'       g   K  W9  d   V.Wi&   K  Wi,          P                  V4       K3  	  VP                   F  p
V
'       g   K  WV
&   K  	  Kg  	  V P                   EF.  pVP                  R8X  g   K  VP                  ^,          '       d   VP                  ^ ,          '       g   KK  VP                  ^ ,          VP                  ^,          rRpRV9   d_   V P                   FM  pVP                  R8X  g   K  VP                  ^ ,          V8X  g   K0  VP                  ^ ,          P                  p M+	  M(V P                   F  pVP                  V8X  g   K  Tp M	  Vf   EK  \        P                  P                  V4      pVP                  ^8X  g   EK9  VP                  4       R
9   g   EKQ  VP                  ^ ,          V9   g   EKl  W{,          pVP                  R8X  d   VP                  ^ ,          '       g   EK  VP                  ^ ,          V9   d   VP                  ^ ,          P!                  R4      '       g)   VP                  ^ ,          P!                  R4      '       d   VP                  4       ^8X  dp   VP#                  VP                  ^ ,          4       VP                  V4       \%        VVP                  ^ ,          ,          4      ^8X  d   VP                  V4       EK  VP                  ^ ,          V9  d   EK  VVP                  ^ ,          ,          pVP                  R8X  d   VP                  ^ ,          '       g   EK  VVP                  ^ ,          ,          pVP                  R	8X  d   VP                  ^ ,          '       g   EK6  VP                  ^ ,          V9   g   EKQ  VP                  ^ ,          P!                  R4      '       g,   VP                  ^ ,          P!                  R4      '       g   EK  VP                  4       ^8X  g   EK  VP#                  VP                  ^ ,          4       VP'                  VVV.4       \%        VVP                  ^ ,          ,          4      ^8X  g   EK  VP                  V4       EK1  	  W3# u uppi )a^  Correct graph which originally use dim of past_seq_len from input_ids's shape which is fixed to max_seq_len after
   shared past/present buffer

Args:
    subg (GraphProto): GraphProto of the decoder subgraph
return:
    tensor_names_to_rename : set of tensor names which is equal to past_sequence_length
    nodes_to_remove : list of node to remove
GatherN	Constant_ConstantShaper"  r#  Reshaper   >      r8   )setr  r   r   r@  r   r   r   rn  rg  r=  r   rC  r   sizeitem
startswithaddr   r   )r  tensor_names_to_renamenodes_to_removeindexinpgraph_input_namesinput_name_to_nodesr   r@  
input_nameoutput_nameshape_tensor_nameshape_index_nameini_gather_indices
const_noderb  gather_indices_arr
shape_nodereshape_nodetranspose_nodes   &                   r(   find_past_seq_len_usager    s    !UO;DTZZ;PQ;PZU5;PQ		**Jz87;f'3'3::4@ %  ;;K{37K0 '  		 <<8#::a==

1 48::a=$**Q-/!%.."&))J!))Z7J<M<Ma<PTd<d-7-A-A!-D-F-F* #, #..F{{&66-3* / ")!%!2!2!;!;<N!O #''1,&++-7JJqM%880C
"**g5*:J:J1:M:M $$Q'+<<"((+667GHH%++A.99:LMM*//1Q6 +..t{{1~>#**40.z/@/@/CDEJ'..z: ##A&.AA2:3C3CA3FG$,,	9l>P>PQR>S>S!4\5G5G5J!K&..+=.BVBVWXBYBY #((+/@@&,,Q/::;KLL)//2==>PQQ*//1Q6 +..t{{1~>#**D*l+KL.~/D/DQ/GHIQN'..~> Q T "22s Rs   S;c                0    V ^8  d   QhR\         R\        /# r8   r   past_seq_len_namer   r<   )r?   s   "r(   r@   r@   a  s      	 c r+   c                 <   R p\        \        R V P                  P                  P                  4      4      pV Fp  p\        VP                  4      ^8  d   VP                  P                  R4       K7  VP                  P                  V4       VP                  P                  V4       Kr  	  V P                  P                  P                  P                  \        P                  P                  V\        P                  . ROR7      4       V P                  4        V # )r  c                      V P                   R 8H  # MultiHeadAttentionr   r@  s   &r(   <lambda>.add_cache_indirection_to_mha.<locals>.<lambda>d      9M)Mr+   r   r  r  r  max_sequence_length)r;   filterr   r   r@  r   r   r   r   rE  rF  r
   r  topological_sort)r   r  cache_indirection_name	mha_nodesr@  s   &&   r(   add_cache_indirection_to_mhar  a  s    0VMu{{O`O`OeOefgI $**o!JJb!

+,

01  
KK""**"K$5$5=p 	+ 	

 
Lr+   c                R    V ^8  d   QhR\         R\        R\        \        ,          /# )r8   r   r   skip_node_idxs)r   r}   r;   )r?   s   "r(   r@   r@   v  s&     1 1	 1# 14PS9 1r+   c                    R p. p\        \        R V P                  P                  P                  4      4      p\        V4       EF  w  rgWb9   d   K  ^ pVP                   F"  p	V	P                  R8X  g   K  V	P                  p M	  Tp
V
^ 8X  dX   V P                  P                  P                   F3  pVP                  VP                  ^,          8X  g   K'  VP                  p
 M	  RpV P                  P                  P                   Fb  pVP                  VP                  ^,          8X  g   K'  VP                  P                  P                  P                  ^,          P                   p M	  \#        VP$                  4      ^8  d   VP$                  P'                  R4       K7  V RV^,           2pVP$                  P'                  V4       VP'                  \(        P*                  P-                  VV
RVRV.R7      4       EK  	  V P                  P                  P$                  P/                  V4       V P1                  4        V # )	output_cross_qkc                      V P                   R 8H  # r  r  r  s   &r(   r  &add_output_qk_to_mha.<locals>.<lambda>z  r  r+   r  target_sequence_lengthr   _r  sequence_lengthr  )r;   r  r   r   r@  r  rn  r   r  r=  r   r   rE   r  rD  ry  r{  r   r   r   r   rE  rF  r   r  )r   r   r  output_qk_basename
output_qksr  idxr@  r  attoutput_qk_dtyper  r  output_qk_names   &&&           r(   add_output_qk_to_mhar  v  s   *JVMu{{O`O`OeOefgIy)	  	>>Cxx;&EE	 "  a[[&&2266TZZ]*&'kkO 3 ":""((AvvA&)*););)A)A)E)Ea)H)R)R& ) $++"KKr"./q
;>*KK..#Y0ACYZ / 	
E *T 
KK##J/	Lr+   c                $    V ^8  d   QhR\         /# )r8   r   r   )r?   s   "r(   r@   r@     s     f$ f$I f$r+   c                   aa R pRpRp\        \        R V P                  P                  P                  4      4      ^ ,          pV P                  V. RO. RO4      pV P                  VRR.^ ^.4      pVe   TpMVe   TpM\        P                  R	4       R# VR,          pVP                  R8X  Ed   VR,          p	V P                  V	RR
.^ ^ .4      oSf   \        P                  R4       R# V P                  V	. RO. RO4      p
V
f   \        P                  R4       R# V
^ ,          pSV
R,          8w  d   \        P                  R4       R# \        \        V3R lV P                  P                  P                  4      4      ^ ,          pV P                  P                  P                  P                  V4       V P                  P                  P                  P                  S^ ,          4       V P                  P                  P                  P                  S^,          4       W9P                  ^ &   W;P                  ^ &   EMpV P                  V. R O. R!O4      pVf   \        P                  R4       R# V^,          pV P                  V. R"O. R#O4      oSf   \        P                  R4       R# S^ ,          pVR,          SR,          8w  d   \        P                  R4       R# \        \        V3R lV P                  P                  P                  4      4      ^ ,          pV P                  P                  P                  P                  V4       \        \        V3R lV P                  P                  P                  4      4      ^ ,          pV P                  P                  P                  P                  V4       V P                  P                  P                  P                  S^,          4       V P                  P                  P                  P                  S^,          4       V P                  P                  P                  P                  S^,          4       V P                  P                  P                  P                  S^,          4       W>P                  ^ &   W;P                  ^ &   V P                  P                  P                  P                  \        P                  P                  V\        P                   ^.R7      4       \        P                  P#                  RV.V.V P%                  R4      R7      p\        P                  P                  V\        P                   . R7      p\        P                  P#                  RV.V.V P%                  R4      \        P&                  R7      p\        P                  P                  V\        P&                  . R7      pV P                  P                  P                  P)                  VV.4       V P                  P                  P*                  P)                  VV.4       V P-                  4        W3# )$r  past_seq_len_int32past_seq_len_int64c                      V P                   R 8H  # )LayerNormalizationr  )ns   &r(   r  *fix_past_sequence_length.<locals>.<lambda>  s    .B!Br+   Addr  RangeSliceNzBCannot identify base path for fixing past_sequence_length subgraphr  zDCannot identify gather path for fixing past_sequence_length subgraphzACannot identify add path for fixing past_sequence_length subgraph:r  NNz]Gather path and add path do not share the same nodes for calculating the past_sequence_lengthc                 `   < V P                   ^ ,          S^ ,          P                  ^,          8H  #     r   r   )r  gather_paths   &r(   r  r    s"    188A;+a.BVBVWXBY3Yr+   zGCannot identify input_ids path for fixing past_sequence_length subgraphzFCannot identify past_key path for fixing past_sequence_length subgraph:r8   NNziThe input_ids path and past_key path do not share the same nodes for calculating the past_sequence_lengthc                 `   < V P                   ^ ,          S^,          P                  ^,          8H  # r  r  r  past_key_paths   &r(   r  r  '  s$    188A;-PQBRBXBXYZB[3[r+   c                 `   < V P                   ^ ,          SR,          P                  ^,          8H  # )r  r  r  s   &r(   r  r  )  s$    AHHQK=QSCTCZCZ[\C]4]r+   r  Squeezeinputsoutputsr   Castr  r  r   to)r  r  TileExpand	Unsqueezer  )r  r  r  r  r  r  rs   )r  r  r  r  r  r  )r  r  r  r  r  r   )r8   r  r  r  r  r  )r  r  r  r  r   )r  r  r  r  r  )r;   r  r   r   r@  match_parent_pathr   r   r   rA  r   r   r   rE  rF  r
   r  r  create_node_nameINT64r   rB  r  )r   r  r  r  r@  base_path_hfbase_path_oai	base_path	base_node
range_nodeadd_pathadd_nodeconstant_in_gatherinput_ids_pathunsqueeze_nodeconstant_in_reshapesqueeze_nodesqueeze_output	cast_nodecast_outputr  r  s   &                   @@r(   fix_past_sequence_lengthr    sF   D /--BEKKDUDUDZDZ[\]^_D**AL
 ++		
AM
  			"!	XY"IG#r]
--wF

 KK^_**&

 KK[\A;(2,&KKwx "&)Y[`[f[f[l[l[q[q"rstuv%%&89%%k!n5%%k!n5 1.q 00K

 !KKab!!$//D

  KK`a&q)"r!22KK{  "&)[]b]h]h]n]n]s]s"tuvwx%%&89"6*]_d_j_j_p_p_u_u#vw
 	%%&9:%%mA&67%%mA&67%%mA&67%%mA&67 #5Q.q 
KK""**+<k>O>OXYWZ*[
 ;;((!"#$##I.	 ) L [[778JKL]L]eg7hN%%"##$##F+ & I ++445GIZIZbd4eK 
KK!!<";<	KK  ''(EF	##r+   c                0    V ^8  d   QhR\         R\        /# r  r  )r?   s   "r(   r@   r@   S  s     D D) D Dr+   c                 4   R pRpV P                   P                  P                  P                  \        P
                  P                  V\        P                  ^.R7      \        P
                  P                  V\        P                  . ROR7      .4       \        \        R V P                   P                  P                  4      4      p\        V4       EF  w  rV^ pVP                   F"  pVP                  R8X  g   K  VP                  p M	  RV^,           2p	\        P
                  P                  V	\        P                   RV^R.R7      p
V^,          ^8X  d0   V P                   P                  P"                  P%                  V
4       \        P
                  P'                  RVP                  ^ ,          VP                  ^,          VP                  ^,          R	R	\)        VP                  4      ^8  d   VP                  ^,          MR	\)        VP                  4      ^8  d   VP                  ^,          MR	VVVVP                  ^,          .VP"                  ^ ,          \)        VP                  4      ^8  d   VP"                  ^,          MR	\)        VP                  4      ^8  d   VP"                  ^,          MR	V^,          ^8X  d   T	MR	.VP                  P+                  R
R4      RVV^,          ^R7      pV^,          ^ 8X  d   VP"                  P-                  R	4       V P                   P                  P                  P-                  V4       V P                   P                  P                  P                  V.4       EK  	  V P/                  4        V # )r  r  r  r  c                      V P                   R 8H  # r  r  r  s   &r(   r  (replace_mha_with_dmmha.<locals>.<lambda>b  r  r+   r  output_cross_qk_zencode_sequence_length / 2DecoderMaskedMultiHeadAttentionr   r  com.microsoft)r  r  r   rq  r  	output_qkra   r  )r   r   r   r   r   rE  rF  r
   r  r;   r  r@  r  rn  r   r  r  r   r   r  r   replacerA  r  )r   r  r  r  r  r  r@  r  r  qk_output_name	qk_output
dmmha_nodes   &&          r(   replace_mha_with_dmmhar)  S  s   J+	KK""KK..z;;L;LUVTW.XKK..!;#4#4<o / 	
 VMu{{O`O`OeOefgIy)		>>Cxx;&EE	 " ,C1H:6KK66K--lIqRn5o 7 
	 7a<KK$$++I6 [[**-

1

1

1!$TZZ1!4

1"!$TZZ1!4

1"!!

1 A"%djj/A"5A2"%djj/A"5A2"%'Q,B	 ""#79Z["Qw&'3 + 

6 7a<$$R(%%d+%%zl3c *f 
Lr+   c          
      T    V ^8  d   QhR\         R\        R\        R\        R\        /# )r8   r   	attn_maskkv_num_heads
world_sizewindow_size)r   r<   r}   )r?   s   "r(   r@   r@     sA     k kkk k 	k
 kr+   c                 n   V P                  \        P                  P                  R \        P
                  ^.^.R7      4       \        P                  P                  RVR .VR,           .V P                  R4      R7      p\        P                  P                  RVR,           R .R.V P                  R4      R7      p\        P                  P                  RR.R.V P                  R4      \        P                  R	7      p\        P                  P                  R
V.VR,           .V P                  R
4      R7      p\        P                  P                  RVR,           R .R.V P                  R4      ^ R7      p	\        P                  P                  RR.R.V P                  R4      \        P                  R	7      p
V P                  P                  P                  P                  VVVVV	V
.4       \        \        R V P                  P                  P                  4      4      p\        V4       EF  w  rV P!                  V. R"O. R#O4      pV P!                  VRR.^ ^ .4      pRRRpppVe   Vw  pppM	Ve   Vw  ppV P!                  V. R"O. R$O4      pV P!                  VRR.^^ .4      pRRRpppVe   Vw  pppM	Ve   Vw  ppV P!                  VRR.^^ .4      pV P!                  VR.^.4      pRRppVe   Vw  ppMVe
   V^ ,          p^ pVe7   Ve3   VP"                   F"  pVP$                  R8X  g   K  VP&                  pK$  	  ^ pVP"                   F"  pVP$                  R8X  g   K  VP&                  pK$  	  VP(                  ^ ,          VP(                  ^ ,          8H  ;'       d(    VP(                  ^ ,          VP(                  ^ ,          8H  pVRJ;'       d    VRJ;'       d    VRJp VRJ ;'       d    VRJ ;'       d    VRJ p!RRRp$p#p"V'       Ed   V '       g
   V!'       Ed   \*        P,                  ! V P/                  VP(                  ^,          4      4      p%\*        P,                  ! V P/                  VP(                  ^,          4      4      p&\*        P,                  ! V P/                  VP(                  ^,          4      4      p'V%P0                  R%,          p(\2        P4                  ! V%V&V'3^R7      P7                  V(^V(,          4      p)\        P8                  P;                  V)RV 2R7      p)V P                  V)4       \        P                  P                  RVP(                  ^ ,          V)P$                  .V)P$                   R2.V P                  R4      R7      p*V P                  P                  P                  P                  V*.4       V P                  P                  P                  P=                  V4       V P                  P                  P                  P=                  V4       V P                  P                  P                  P=                  V4       V*P>                  ^ ,          p"V '       Ed9   \*        P,                  ! V P/                  VP(                  ^,          4      4      p+\*        P,                  ! V P/                  VP(                  ^,          4      4      p,\*        P,                  ! V P/                  VP(                  ^,          4      4      p-V+P0                  R%,          p(\2        P4                  ! V+V,V-3^ R7      P7                  ^V(,          4      p.\        P8                  P;                  V.RV 2R7      p.V P                  V.4       \        P                  P                  RV*P>                  ^ ,          V.P$                  .V.P$                   R2.R7      p/V P                  P                  P                  P                  V/.4       V P                  P                  P                  P=                  V4       V P                  P                  P                  P=                  V4       V P                  P                  P                  P=                  V4       V/P>                  ^ ,          p"M9VP>                  ^ ,          p"VP>                  ^ ,          p#VP>                  ^ ,          p$\        P                  P                  RT"T#T$VP(                  ^,          VP(                  ^,          VP>                  ^ ,          V
P>                  ^ ,          Ve   VP(                  ^,          MRVe   VP(                  ^,          MR.	VP>                  VP$                  PA                  RR4      R VV,          V^ 8X  d
   VV,          MW#,          T\C        VRJ;'       d    VRJ4      VR!7
      p0V P                  P                  P                  P=                  V4       V P                  P                  P                  P                  V0.4       Ve0   V P                  P                  P                  P=                  V4       Vf   EKT  V P                  P                  P                  P=                  V4       EK  	  V # )&oner   r   r   vals	ReduceSum	_row_sumsr   Subseqlens_k_int64r  	seqlens_kr  r  _shaper  total_seq_len_int64)r  r  r   r   total_seq_lenc                      V P                   R 8H  # r  r  r  s   &r(   r  &replace_mha_with_gqa.<locals>.<lambda>   r  r+   RotaryEmbeddingr  r   Ninterleavedr  r   r   QKV_Weight_r   _output	QKV_Bias_)r  r  GroupQueryAttentionr  r#  )	r  r  r   rq  r  r,  local_window_size	do_rotaryrotary_interleaved)r=  r  r   )r  r  r  r	  rs   )"add_initializerr   rE  make_tensorr
   r  r  r  r  r   r   r@  r   r;   r  r  r
  rn  r   r  r   r   r   r   rD  r   stackreshaperC  
from_arrayrA  r   r%  r}   )1r   r+  r,  r-  r.  reduce_sum_nodesub_nodeseqlen_k_cast_noder  gather_nodetotal_seqlen_cast_noder  r  r@  q_path_1q_path_2q_rotaryq_addq_matmulk_path_1k_path_2k_rotaryk_addk_matmulv_path_1v_path_2v_addv_matmulr>  r  r  root_input_is_sameall_paths_have_biasall_paths_have_no_biasq_input_to_attentionk_input_to_attentionv_input_to_attentionqwkwvwry  
qkv_weightpacked_matmul_nodeqbkbvbqkv_biaspacked_add_nodegqa_nodes1   &&&&&                                            r(   replace_mha_with_gqarp    sf	   & 
!''	 	  	
 kk++5![()##K0	 , O {{$$K'/"###E*	 % H ..!"##F+ /  &&{X%&##G,	 ' J ++''H$e,&'##H- ( K "[[22%& !##F+ 3  
KK!!"	
	H VMu{{O`O`OeOefgIy)	**41UW`a**42CX1NQRTUPVW$($%(0%HeX!!)Hh **41UW`a**42CX1NQRTUPVW$($%(0%HeX!!)Hh **4%1BQFK**4(aSAx&OE8!{H H$8))88},"%%%K *
 	>>Cxx;&EE	 "
 &^^A.(..2CCnnWXHY]e]k]klm]nHn $4/[[E4E[[%W[J[!&$!R!R5D=!R!RUd] LNrSU4H2#6:P:P%%e&;&;HNN1<M&NOB%%e&;&;HNN1<M&NOB%%e&;&;HNN1<M&NOB((2,C2r2,Q7??QWMJ**55jUXTYGZ5[J!!*-!%!6!6 q):??;&OO,G45++H5	 "7 " KK""))+=*>?KK""))(3KK""))(3KK""))(3#5#<#<Q#?  #" ))%*?*?A*OP ))%*?*?A*OP ))%*?*?A*OPhhrl88RRLq9AA!c'J,,77SVRWGX7Y%%h/"&++"7"7.55a8(--H (g67 #8 #
 !!&&--.?@!!&&--e4!!&&--e4!!&&--e4'6'='=a'@$ $,??1#5 #+??1#5 #+??1#5  ;;((!$$$

1

1"))!,&--a0&.&:"&.&:"
 KK""#79NO":-5AQ5F)z1LLf)($.GG843GH*) ) 
, 	%%d+%%xj1KK""))(3KK""))(3E *H Lr+   c                $    V ^8  d   QhR\         /# r  r  )r?   s   "r(   r@   r@     s     &f &f &fr+   c           	      X   ^pV P                    Uu. uF  q"P                  NK  	  ppV^8  d)   W1,          P                  R4      '       g   V^,          pK/  ^p\        V P                  4      V,
          ^,          p^V,          V,           p\        V4       Uu/ uF.  qpP                   V^,          V,           ,          P                  VbK0  	  pp\        RV 24       \        V P                   V,          4      p	\        RV	 24       V	^ ,          p
V	^,          pV	^,          p^ pV P                   EFU  pVP                  R8X  g   K  VP                   ^,          V9   g   K1  \        RVP                   RVP                   24       V^,          pWP                   ^,          ,          pRV 2pR.^\        VP                  4      ,
          ,          pVP                  V4       VP                  P                  V4       VP                  P                  \        P                  P                  R	^4      .4       \        P                  P!                  V\"        P$                  W^V.4      pV P                  P                  V.4       EKX  	  W8w  d   \'        R
V RV 24      hR# u upi u upi )r  pastz    -- past_key_cross_inputs = zpast_key_cross_0_shape is r"  z'    -- add cross QK output from: node: z with output: r!  r   r$  z#Did not add cross QK for all layersz vs N)r   r   r  r   r   r  printr~  r@  r   r   r   rn  r   rE  make_attributerF  r
   r  r  )r  input_self_past_0gir  output_self_present_0
num_layersinput_cross_past_0layerpast_key_cross_inputsinput_past_key_cross_0_shapebatch_size_dimnum_heads_dimcross_seq_len_dimnum_layer_output_qkr@  cross_attention_out_nameappended_namescross_attentions   &                 r(   .update_decoder_subgraph_output_cross_attentionr    s^   +/::6:R:6
a
(9(L(W(WX^(_(_Qdkk"%::q@JZ*;;afgqarsarX]ZZ	4F(FGLLeSars	+,A+B
CD#+DJJ7I,J#K 	&'C&D
EF1!4N03M4Q7		LL==DJJqMUjDj;DII;nUYU`U`Tabc1$)**Q-8E)9%'A$ TQT[[)9%9:N!!":;KK~.NN!!4;;#=#=k1#M"NO"kk@@(!!3DEO
 KK01! " (>zl$ObNcdee )E 7 ts   J"4J'c                $    V ^8  d   QhR\         /# r  rT  )r?   s   "r(   r@   r@     s     \ \* \r+   c           
      B   ^pV P                    Uu. uF  q"P                  NK  	  ppV^8  d)   W1,          P                  R4      '       g   V^,          pK/  ^p\        \	        V P                   4      V,
          ^,          4      p^V,          V,           p. p. pV P
                   F(  p	V	P                  R8X  g   K  VP                  V	.4       K*  	  \	        V4      V8  d   R# Rp
V P
                   F  p	V	P                  R8X  g   K  T	p
 M	  . R#OpRp\        V 4      w  r\	        V4      ^ 8  d   V F  p\        RV R	V R
24       K  	  V F(  p\        RVP                   RVP                   24       K*  	  \        P                  P                  RR.R.RR7      p\        P                  P                  RR.V.R\        P                  R7      pVP                  VV.4       V P
                   EF)  p	\	        V	P                  4      ^ 8  d   V
e   V	P                  ^ ,          V
P                   ^,          8X  dg   \        P                  P                  RR.R.R\        P                  R7      pVP                  ^ ,          V	P                   ^&   VP                  V.4       V	P                  R8X  Ed   \!        V	4      pVP#                  4        F  pVV9  g   K  VV K  	  V	P                   ^ ,          V	P                   ^,          V	P                   ^,          .pTP                  \	        V	P                   4      ^8  d   V	P                   ^,          MR.4       TP                  \	        V	P                   4      ^8  d   V	P                   ^,          MR.4       TP                  \	        V	P                   4      ^8  d   V	P                   ^,          MR.4       TP                  \	        V	P                   4      ^8  d   V	P                   ^,          MR.4       VP                  R.4       VP                  R.4       VP                  R.4       TP                  \	        V	P                   4      ^8  d   V	P                   ^,          MR.4       ^VR&   \        P                  P                  ! RVV	P                  3RV	P                  /VB p	W9  g   EK  \%        V	P                   4       F  w  ppVV9   g   K  WP                   V&   K  	  VP                  V	.4       EK,  	  V P'                  R4       V P
                  P                  V4       V P                    Uu. uF  pVP                  NK  	  pp. p\%        V P                   4       F  w  ppVV8  dv   VV8  do   \)        V4      p\        P                  P+                  VP                  VP,                  P.                  P0                  V^ ,          V^,          RV^,          .R7      pVP                  V.4       K  	  RV9  dL   VP                  \        P                  P+                  R\        P                  P2                  ^.R7      .4       RV9  dL   VP                  \        P                  P+                  R\        P                  P2                  ^.R7      .4       RV9  dM   VP                  \        P                  P+                  R\        P                  P2                  . R$OR7      .4       V P'                  R 4       V P                   P                  V4       . p\%        V P                  4       F  w  ppVV8  do   \)        V4      p\        P                  P+                  VP                  VP,                  P.                  P0                  V^ ,          V^,          RV^,          .R7      pVP                  V.4       K  	  V P'                  R!4       V P                  P                  V4       R"# u upi u upi )%r  rs  r  FNRelativePositionBiasra   #past_sequence_length_squeezed_int64zFound tensor name `z` to be renamed to ``zFound node to remove: type = z	, name = r  r  past_sequence_length_squeezed!node_past_sequence_length_squeezer@  r  &node_past_sequence_length_squeeze_cast)r   r  past_sequence_length_int64past_sequence_length_castr   r  r  r"  r   r@  r  r  r  r   r   Tr  r  )r   r   r  r}   r   r@  r   r   r  rt  r   rE  r  r
   r  r   rv  r  r  r  r~  rF  rE   r  r  r  )r  rv  rw  r  output_self_past_0ry  rz  r  	old_nodesr@  rel_pos_bias_noder  target_squeezed_past_seq_namer  r  name_to_renamenrr  r  rs  r  r  r  r   r  orig_input_namesr  r  r|  rD  r  s   &                              r(   ?update_decoder_subgraph_share_buffer_and_use_decoder_masked_mhar    s   +/::6:R:6
a
(9(L(W(WX^(_(_Qc$**o(99Q>?JZ*;;II		<<//dV$ 
 9~
" 		<<11 $ 
/+ %J!.Ed.K+
!"Q&4N''77KLiKjjklm 5!B1"**YrwwiPQ " {{,,#$,-4	 - 
 KK)),-*+9   * 
	 	,	23		t{{a$5$AdkkRSnXiXoXopqXrFr--'(-.0$$ . I &,,Q/DJJqMi[)<<//t_F[[]CCq	 # 

1

1

1C JJTZZ1)<

1"EFJJTZZ1)<

1"EFJJTZZ1)<

1"EFJJTZZ1)<

1"EFJJ./0JJ~&JJ+,-JJTZZ1)<

1"EF23F./;;((1 YY	
 D &(4t11(EJJu%  5 dV$a d 	OOFIIY,0JJ7JSJ7J4::&2!!a*<&<RLE33''--77Qxq=%(C 4 B
 	2$ ' %55[[//0FHXHXH^H^ghfi/jk	
 ++4;;==lDL\L\LbLbkljm=nop"2222'$$**E 3 	
 	OOGJJj!K4;;'2""RLE33''--77Qxq=%(C 4 B
 	B4  ( 	OOHKK{#s 7Z 8s   ``c                $    V ^8  d   QhR\         /# )r8   model_protorT  )r?   s   "r(   r@   r@   P  s     8 8 8r+   c                    \        V 4      pVP                  4       p. p. pVP                  4        EF  pVP                  R 8X  g   K  RVP                  ^,          9   d   RVP                  ^,          9   d   KI  W%P                  ^ ,          ,          pW%P                  ^,          ,          pW%P                  ^,          ,          pVP                  VP                  ^,          4      p	VP                  VP                  ^,          4      p
VP                  VP                  ^,          4      pV	'       d   V
'       d	   V'       g    R# \        P                  ! V	4      p\        P                  ! V
4      p\        P                  ! V4      p\        P                  ! WV.^R7      pVP                  RRR7      p\        P                  P                  VR,           V	P                  ^8X  d   \        P                   M\        P"                  VP$                  ^ ,          VP$                  ^,          .VP'                  4       P)                  4       R	7      pV P*                  P,                  P/                  V.4       \        P                  P1                  RVP                  ^ ,          VR,           .VR
,           .VR7      pVP2                  ^ ,          VP                  ^ &   RVP                  ^&   RVP                  ^&   VP/                  V.4       VP/                  WgV.4       EK  	  VP5                  V4       VP7                  V4       VP9                  4        VP;                  4        R# )r"  past_key_crosspast_value_crossFr   r   
MatMul_QKV)name_prefix_weightr1  _outr   r   T)r   r   nodesr   r   r   r   r   r   r   r  r   rE  rH  r   r
   r  r|   rD  flattentolistr   r=  r   r  r   	add_nodesremove_nodesupdate_graphr  )r  
onnx_modelr   nodes_to_addr  r@  rU  rZ  r^  q_weightk_weightv_weightre  rf  rg  rh  matmul_node_nameweightr   s   &                  r(   pack_qkv_for_decoder_masked_mhar  P  s   ;'J$88:LO  "<<<<4::a=05G4::VW=5X*::a=9H*::a=9H*::a=9H!11(..2CDH!11(..2CDH!11(..2CDHh%%h/B%%h/B%%h/B1=J)::8Q]:^[[,,%	1080B0Ba0G;,,[M`M` &&q):+;+;A+>?'')002	 - F ))00&:++// q)+;i+GH)F23%	 0 K (..q1DJJqMDJJqMDJJqM.""H#ABU #X &O,!r+   c                0    V ^8  d   QhR\         R\        /# )r8   decoder_onnx_pathrW   r   )r?   s   "r(   r@   r@     s      # ae r+   c                n   \         P                  ! V RR7      p\        \        VP                  P
                  4      4       F  pVP                  P
                  V,          P                  R8X  g/   VP                  P
                  V,          P                  R8X  g   K]  VP                  P
                  V,          P                  P                  P                  P                  ^,          pVP                  R4      '       d   VP                  4        ^Vn        K  	  \        P                  ! VV VR7       R# )a=  Update the input shapes for the inputs "input_ids" and "position_ids" and make the sequence length dim value 1 for each of them.
   The decoder model will be over-written.

Args:
    decoder_onnx_path (str): Path of GPT-2 decoder onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   r  r	  rz  r   )r   r   r  r   r   r   r   rE   r  rD  ry  HasFieldClearr{  r   r   )r  rW   r   r  shape_dim_protos   &&   r(   *update_input_shapes_for_gpt2_decoder_modelr    s     //*;PTU3*006678%%++A.33{B"((..q166.H177==a@EEQQWW[[\]^O ''44%%' )*O% 9 NN6
 r+   c                H    V ^8  d   QhR\         R\         R\        R\        /# )r8   r  init_decoder_onnx_pathrW   r:   r   )r?   s   "r(   r@   r@     s6     h hhh #h 
	hr+   c           	     	   \         P                  ! V RR7      pVP                  P                  ^ ,          P                  p\        V4      pVP                  4       pWF9   g   Q hWd,          pVP                  R8w  d   R# VP                  V. RO. RO4      pVf   VP                  V. RO. RO4      pVf3   VP                  V. RO. RO4      pVf   VP                  V. RO. RO4      pVf   R# VR,          p	V	P                  R8H  p
V
'       gx   ^ pVP                  V	. ROV^ ^ ^ .4      pVf   ^pVP                  V	. ROV^ ^ ^ .4      pVf   ^ pVP                  V	. ROV^ ^ .4      pVf   ^pVP                  V	. ROV^ ^ .4      pMr^ pVP                  V	. ROV^ ^ .4      pVf   ^pVP                  V	. ROV^ ^ .4      pVf   ^ pVP                  V	RR.V^ .4      pVf   ^pVP                  V	RR.V^ .4      pVf   R# V^8X  d   ^ M^pV
'       g   VP                  V	RV4      pMVP                  V	RV4      pVf   R# VR,          pVR,          p\         P                  P                  R\        P                  ^.R.R	7      p\         P                  P                  R
\        P                  ^.R.R	7      p\         P                  P                  R\        P                  ^.^.R	7      p\         P                  P                  R\        P                  ^.R.R	7      pVP                  V4       VP                  V4       VP                  V4       VP                  V4       RVP                  ^ ,          ,           p\         P                  P                  RVP                  ^ ,          RR
RR.V.VP!                  RR4      R7      pV
'       g   VP                  ^ ,          MVP                  ^,          pRVP                  ^ ,          ,           p\         P                  P                  RVRR
RR.V.VP!                  RR4      R7      pVP#                  V4       VP#                  V4       VP%                  VVP                  ^ ,          V4       VP%                  V	VV4       VP'                  4        \
        P(                  ! VVVR7       R# ) al  Generates the initial decoder GPT2 subgraph and saves it for downstream use.
   The initial decoder model will be saved to init_decoder_onnx_path.

Args:
    decoder_onnx_path (str): Path of GPT-2 decoder onnx model
    init_decoder_onnx_path (str): Path of GPT-2 init decoder onnx model
    use_external_data_format(bool): output tensors to external data or not.
Tr   r   Fr  SkipLayerNormalizationr  SliceLastTokenStartsr1  SliceLastTokenEndsSliceLastTokenAxesSliceLastTokenStepsedge_modified_r  GatherLastToken_0_r   GatherLastToken_1_r   )r  r  r  r  r  r   r  FastGelur  r   r  r  r  )r  r  r  r  r  r  r  r  r  r  r  r  r  )
r  r  r  r   r  r  r  r   r  r  )
r  r  r  r  r  r  r  r  r  r  )r  r  r  r   r  r   r  r  )r  r  r  r  r  r  r  r  )r  r   r  r   r  )r  r  r  r  r  rs   )r  r  r   r  )r  r   r  )r  r   r  r  )r   r   r   r   r   r   r   r   r
  r   rE  rH  r
   r  rG  r  r  r  replace_node_inputr  r   )r  r  rW   init_decoder_model_protor   gpt2_init_decoder_modelr   logits_matmul_node"logits_matmul_to_residual_add_pathresidual_add_nodeis_skiplayernorm_path&residual_add_to_attention_parent_indexresidual_add_to_attention_path residual_add_to_add_parent_indexadd_before_residual_add	attentionmatmul_after_attentionslice_starts
slice_ends
slice_axesslice_stepsslice_0_output_nameslice_node_0add_before_residual_add_outputslice_1_output_nameslice_node_1s   &&&                       r(   generate_gpt2_init_decoderr    s     $/@UYZ177>>qAFF'(@A1EEG444,@ !!X- *A)R)R	
 	0#*&* *1-D-V-V +.
*$ *1-D-V-V	 %.
*  .51H1Z1Z"  
2. *1:2> .559QQ !12.)@)R)R23Q1=*
& *1562-D-V-V!67AqA.* *1562-D-V-V!.7A>.* *1562-D-V-V!.7A>.* 23.)@)R)R+3Q:*
& *1562-D-V-V!/7A>.* *1562-D-V-V!;'7;.* *1562-D-V-V!;'7;.* &-,RVW,Wq]^$ !"9"F"Fu&F#
 #:"F"F$,#
 &.r2I;B?;;**###ST	 + L ((!##ST	 ) J ((!##SS	 ) J ++))"##ST	 * K ++L9++J7++J7++K8 +Y-=-=a-@@;;((Q"  !
 %%$55g?ST ) L" 2G&&q)LcLjLjklLm # +-D-K-KA-NN;;((*"  !
 %%$55g?ST ) L $$\2$$\2 ../EyGWGWXYGZ\op../@B`buv ,,. NN 6
 r+   c                >   \        ^4      p\        VP                  4      p\        VP                  4      p\        VP                  4      pV P                  P
                   F  pVP                  P                  P                  P                   F^  pVP                  R4      '       g   K  VP                  VVVV39   g   K3  \        VP                  4      pVP                  4        Wn        K`  	  K  	  V P                  P                   F  pVP                  P                  P                  P                   F^  pVP                  R4      '       g   K  VP                  VVVV39   g   K3  \        VP                  4      pVP                  4        Wn        K`  	  K  	  R# )z_Make dim_proto numeric.

Args:
    model: T5 encoder and decoder model.
    config: T5 config.
rz  N)r<   r  d_modeld_kvr   r   rE   r  rD  ry  r  rz  r}   r  r{  r   )	r   configr  r  hidden_size	head_sizerb  	dim_protor{  s	   &&       r(   make_dim_proto_numeric_t5r  	  sG    !fOF$$%Ifnn%KFKK I++$$0066::I!!+..93F3F	K 4  	 3 34	!&/# ; % ++##0066::I!!+..93F3F	K 4  	 3 34	!&/# ; $r+   c                D    V ^8  d   QhR\         P                  R\        /# )r8   r   generation_type)r=   r>   r    )r?   s   "r(   r@   r@   	  s&     y0 y0


y0#y0r+   c                0   V P                   R8H  pV\        P                  8H  pV\        P                  8H  pV\        P                  8H  pV P
                  p\        P                  RV 24       \        V P                  4      ^8X  d   V P                  ^ ,          R8X  dz   V'       dk   V P                  \        P                  P                  8X  dB   . RoOV n	        \        P                  RV P                   24       \        P                  R4       M. V n	        V'       g	   V'       dN   V'       g   \        R4      hV P                  '       d   \        R4      hV P                   '       d   \        R4      hV'       d&   V'       d   V P"                  '       g   \%        R	4      hV P"                  '       d   V'       g   \%        R
4      hV P"                  '       d   V P&                  '       g   \%        R4      hV'       Ed   V P(                  '       dT   \*        P,                  P/                  V P(                  4      '       d%   \        P                  RV P(                   24       EM(V P(                  '       gY   V P0                   RV P                   R2p\3        \3        V P4                  4      P6                  V4      P9                  4       V n        \        P                  RV P0                   RV P(                   R24       \;        V 4       MV P(                  '       dC   V P<                  '       d1   \        P                  RV P(                   RV P<                   24       M.\        P                  RV P0                   R24       \?        V 4       RpV P@                  '       g   V P                  \        P                  P                  8X  d   V'       dy   V'       g   V'       g	   V'       da   \        P                  RV P(                   R24       \C        V P(                  V PD                  4      pV'       g   \        PG                  R4       Rp	Rp
V PH                  '       Eg   V'       d   V'       g   V'       g	   V'       d   \        P                  RV P(                   R24       RV P                   R2p\3        \3        V P4                  4      P6                  V4      P9                  4       p
\K        V P(                  V
V PD                  4      p	V	'       g   \        PG                  R4       V	'       d2   \M        V P(                  V PD                  4      '       g   \%        R4      hV'       g   V PN                  '       g	   V	'       dz   \        P                  R V P(                   R24       \Q        V P(                  V PD                  4       V	'       d/   \        P                  R V
 R24       \Q        WPD                  4       V'       d.   \R        PT                  ! V P0                  V PV                  R!7      pMjV P                   R"8X  d.   \X        PT                  ! V P0                  V PV                  R!7      pM,\Z        PT                  ! V P0                  V PV                  R!7      pV P\                  '       d   \        P                  R#V 24       VP^                  pV'       d   VP^                  MVP`                  pVPb                  pV Pb                  Rp8w  d   V Pb                  pV P^                  Rp8w  d   V P^                  pV P`                  Rp8w  d   V P`                  p\d        Pf                  ! V P(                  R$R%7      pV P                    R&2VPh                  n5        RpV P                   R8X  d   \m        VPh                  V P                  4       V	'       dW   \d        Pf                  ! V
R$R%7      pV P                    R'2VPh                  n5        \m        VPh                  V P                  4       M \o        VPh                  V P                  4       RpV'       d   . RqOpMV'       g	   V'       d   . RrOpV Pp                  '       d   VPs                  R/4       MVPs                  R04       V Pt                  '       d   VPs                  R14       MVPs                  R04       V Pv                  '       d   VPs                  R24       MVPs                  R04       V'       dk   V Px                  '       d%   V Pz                  '       d   VPs                  R34       MVPs                  R04       V P|                  '       d   VPs                  R44       R5.pV P                  '       d   VPs                  R64       V P                   '       d+   V P                  '       g   Q R74       hVPs                  R84       RpV'       d2   \d        P~                  P                  R9VVR:V P                    2R;7      pMqV'       d2   \d        P~                  P                  R<VVR=V P                    2R;7      pM8V'       d1   \d        P~                  P                  R>VVR?V P                    2R;7      pR@VnA        RpV'       d   \d        P~                  P                  RAV4      \d        P~                  P                  RBV4      \d        P~                  P                  RCV P                  4      \d        P~                  P                  RDV P                  '       d   ^M^ 4      \d        P~                  P                  REV P                   R8X  d   ^ M^4      .pEM?V'       d   \d        P~                  P                  RAV4      \d        P~                  P                  RBV4      \d        P~                  P                  REV P                   R8X  d   ^ M^4      \d        P~                  P                  RCV P                  4      .pEMV'       Ed   \d        P~                  P                  RAV4      \d        P~                  P                  RBV4      \d        P~                  P                  REV P                   R8X  d   ^ M^4      \d        P~                  P                  RCV P                  4      \d        P~                  P                  RFV P                  4      \d        P~                  P                  RGV P                  4      \d        P~                  P                  RHV P                  4      \d        P~                  P                  RIV P                  4      \d        P~                  P                  RJV Px                  4      \d        P~                  P                  RKV P                  4      .
pV'       d1   VP                  \d        P~                  P                  RLV4      .4       VP                  P                  V4       . pV P                   Rs9   Ed   V PN                  '       dD   \        P                  RMV P<                   R24       \Q        V P<                  V PD                  4       \d        Pf                  ! V P<                  R$R%7      p\        VPh                  P                  4      ^8X  d   RNMROpV P                    RPV 2VPh                  n5        \        VPh                  V P                  4       \        VV4       \        VV4       V'       d   V P"                  '       g   \%        RQ4      h\        P                  RR4       \        VPh                  4      '       d   \        P                  RS4       M\        P                  RT4       \        V4      '       d   \        P                  RU4       M\        P                  RV4       V P                  '       gK   \        VV4      p\        P                  \        V4       RWV Uu. uF  pVPj                  NK  	  up RX24       VP                  ^ 8  g   Q RY4       hVP                  P                  \d        P~                  P                  RNVPh                  4      \d        P~                  P                  RZVPh                  4      \d        P~                  P                  R[VP                  4      .4       EMV	'       Ed   V P                  '       gK   \        VV4      p\        P                  \        V4       RWV Uu. uF  pVPj                  NK  	  up R\24       V'       d+   \        P                  R]4       \        VPh                  4       V P"                  '       d)   \        VPh                  VR4      '       g   \%        R^4      hVP                  Ps                  \d        P~                  P                  R_VPh                  4      4       M6\        VPh                  4      p\        P                  \        V4       R`24       V'       d+   \        P                  Ra4       \        VPh                  4       V P"                  '       d)   \        VPh                  VR$4      '       g   \%        Rb4      hVP                  Ps                  \d        P~                  P                  RZVPh                  4      4       \d        P~                  P                  R(\        P                  RcRd.4      p\d        P~                  P                  R)\        P                  ^.4      p\d        P~                  P                  R*\        P                  ^.4      p\d        P~                  P                  R+\        P                  ^.4      p\d        P~                  P                  R,\        P                  ^.4      p\d        P~                  P                  R-\        P                  ^.4      p\d        P~                  P                  R.\        P                  ^.4      p Rp!V'       d   VVVVVVV .p!MV'       g	   V'       d   VVVV .p!V Pp                  '       dB   \d        P~                  P                  R/\        P                  V.4      p"V!Ps                  V"4       V Pt                  '       dC   \d        P~                  P                  R1\        P                  RcV.4      p#V!Ps                  V#4       V Pv                  '       dC   \d        P~                  P                  R2\        P                  RcRd.4      p$V!Ps                  V$4       V Px                  '       dU   V Pz                  '       dC   \d        P~                  P                  R3\        P                  RcV.4      p%V!Ps                  V%4       V'       dT   V P|                  '       dB   \d        P~                  P                  R4\        P                  ^.4      p&V!Ps                  V&4       Rp'V'       d3   \d        P~                  P                  R5\        P                  . RtO4      p'MAV'       g	   V'       d2   \d        P~                  P                  R5\        P                  RcR).4      p'V'.p(V P                  '       dC   \d        P~                  P                  R6\        P                  RcR,.4      p)V(Ps                  V)4       V P                   '       dE   \d        P~                  P                  R8\        P                  ReRcR+V.4      p*V(Ps                  V*4       \d        P~                  P                  V.V'       g   V P                    Rf2MV P                    Rg2V!V(V4      p+\d        P~                  P                  V+RhVP                  Ri7      p,V PD                  '       dt   ^ RjI^H_p- V-P                  \d        P                  4      V-P                  Rk4      8  d   \        PG                  Rl4       \        P                  ! V,V P4                  R$R$Rm7       M!\d        P                  ! V,V P4                  4       \        P                  RnV P4                   24       R# u upi u upi )uz|Convert model according to command line arguments.

Args:
    args (argparse.Namespace): arguments parsed from command line
rG   z**** past_present_share_buffer=rT   z**** Setting op_block_list to zI**** use --op_block_list if you want to override the block operator list.z<Currently only gpt2 with greedy search/sampling is supportedzLoutput_sequences_scores currently is not supported in greedy search/samplingzHoutput_token_scores currently is not supported in greedy search/samplingzi`use_decoder_masked_attention` MUST be turned on to use `past_present_share_buffer` in case of BeamSearchzS`past_present_share_buffer` MUST be turned on to use `use_decoder_masked_attention`z?`use_decoder_masked_attention` option is only supported on GPUsz)skip convert_to_onnx since path existed: _past_z.onnxzConvert GPT model z	 to onnx z ...z,skip convert_to_onnx since paths specified: z and zConvert model z to onnx ...Fz=Pad logits MatMul weights for optimal MatMul perf in fp16 on z. The file will be overwritten.z]Tried and failed to pad logits MatMul weights. Performance may be sub-optimal for this MatMulNz*Creating an initial run GPT2 decoder from z. gpt2_init_past_zuTried and failed to generate the init decoder GPT2 model. Performance may be sub-optimal for the initial decoding runzGCould not update the input shapes for the non-initial decoder subgraph.z Run symbolic shape inference on r   rq   zConfig=Tr   z decoderz init decoderr  
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr`   r   rc   r
  re   rf   	sequencessequences_scoresz8--output_token_scores requires --output_sequences_scoresscores
BeamSearchBeamSearch_r   GreedySearchGreedySearch_Sampling	Sampling_r#  eos_token_idpad_token_idno_repeat_ngram_sizer_   r   temperaturetop_pfilter_valuemin_tokens_to_keepcustompresence_penalty
vocab_sizezSymbolic shape inference on r[  zencoder and decoder init zMpast_present_share_buffer is only supported with use_decoder_masked_attentionzl*****update t5 decoder subgraph to share past/present buffer and use decoder_masked_multihead_attention*****z4*****update t5 decoder subgraph successfully!!!*****zF*****DecoderMaskedMultiHeadAttention is not applied to T5 decoder*****z9*****pack qkv for decoder masked mha successfully!!!*****z3*****pack qkv for decoder masked mha failed!!!*****z shared initializers (z>) in encoder and decoder subgraphs are moved to the main graphz%decoder_start_token_id should be >= 0r\  decoder_start_token_idzC) in decoder and init decoder subgraphs are moved to the main graphzY*****update init decoder subgraph to make past and present share buffer******************zLCould not update the init decoder subgraph to use DecoderMaskedSelfAttentioninit_decoderz: initializers from the decoder are moved to the main graphzT*****update decoder subgraph to make past and present share buffer******************zGCould not update the decoder subgraph to use DecoderMaskedSelfAttentionr  r  zmax_length - sequence_lengthz beam searchz greedy searchzonnxruntime.transformers)producer_nameopset_imports)versionz1.12.0z0Require onnx >= 1.12 to save large (>2GB) model!)r   all_tensors_to_one_filezmodel save to )r  r  r  r  rs   )r  r  r  r  r  r  r  )r  r  r  r  rq   rr   )r  r  r  )dr   r    r1   r2   r3   ra   r   r   r   r   r   r   r|   r&   NotImplementedErrorr]   r^   rb   r  rl   r   rx   ry   existsr   r   r   r   as_posixr   r   r   rY   r   rW   r   rZ   r  r  rX   r   r   from_pretrainedr   r   r   rO   r  r  r  r   r   r   r   r  r)  r`   r   rc   rd   r  re   rf   rE  r  rq  ru  r  r_   r  r  r  r  r  r   rn  r   r3  r  r  r  r[   r^  r  r  r  rc  rF  r
   r  r  
make_graph
make_modelopset_import	packagingr  parse__version__r   r   ).r   r  is_gpt2is_beamsearchis_greedysearchis_samplingra   onnx_filenamelogits_matmul_weight_paddedgpt2_init_decoder_generatedgpt2_init_decoder_onnx_pathgpt2_init_decoder_onnx_filenamer  r  r  r  r   r  r  r  r@  attr_to_extendr]  rS  suffixr  r  r  r  r  r  r  r  graph_inputsr`   rc   r
  re   rf   r  graph_outputsr  r  	new_graph	new_modelr  s.   &&                                            r(   convert_generation_modelr!  	  s~    OOv-G)^-F-FFM+~/J/JJO'>+B+BBK&*&D&D
KK12K1LMN
4!#(:(:1(=(Gt~~):):)@)@@"D KK89K9K8LMNKKcd!#D+%&dee'''%&tuu###%&pqq !]4;\;\;\w
 	
 (((1Jnoo (((Z[[w0A0A!B!BKKCDDUDUCVWX$$$#'#:#:";6$..AQQV W$(dkk):)A)A=$Q$Z$Z$\!KK,T-D-D,EYtO`O`Naaefg!?!?!?KK>t?P?P>QQVW[WuWuVvw KK.)@)@(ANOt #('''NNi//555oKDL]L]K^ _, ,	
 'C4CTCTVZVsVs&t#*NNo #("&;;;o@ARAR@SSUVW,;DNN;K5*Q'&*4+<+C+CEd&e&n&n&p#&@'))'
# +NNN '/Yt<<0
 0
 fgg
 #d&>&>&>B]6t7H7H6IIhij))4+H+HI&KK:;V:WWvwx79V9VW++D,C,Ct~~^	D	 ))$*A*AT^^\**4+B+Bdnn]|||gfX&'&&L*16&&v7J7JL""J "__
B((B((OOD$5$5$OM"&//!2(;M"& ]00$..A '&*oo6Qfj&k#48OO3DM1R#)). !8!>!>O"=#6#6GF
 
K
 l#b)*b!!!&'b;;;4---MM/*MM"999MM&!mG###)*+++g-gg+x D{{$$t/0	 % 
 
{{$$  12	 % 
 
{{$$T__-.	 % 
 "DKNKK&&~|DKK&&~|DKK&&'=t?X?XYKK&&'7d>Q>Q>QWXYKK&&|$//V:SQYZ[
 
KK&&~|DKK&&~|DKK&&|$//V:SQYZ[KK&&'=t?X?XY	
 
KK&&~|DKK&&~|DKK&&|$//V:SQYZ[KK&&'=t?X?XYKK&&}d6F6FGKK&&w

;KK&&~t7H7HIKK&&';T=T=TUKK&&x=KK&&'94;P;PQ
 #t{{99,
STUNN.)L-'###KK6t7U7U6VVuvwD::D<Y<YZ(F(F[_`!-"5"5";";<AGa&*oo%6ax#@ /0C0CT^^T!-8!-8 %444 !pqqKK~ O}ObObccRSde.}==WXQR///2=-PLKK|$%%;\<Z\QVV\<Z;[  \Z  [ ,,1Z3ZZ1**9m6I6IJ**9m6I6IJ**+CVEbEbc	
 '& 33367NP]^<())?Q]@^Q]AQ]@^?_  `c  d
 )wxABYB_B_` 0009m'--}e: : !!oppNN!!$++"<"<^MdMjMj"kl -]-@-@ALKK3|,--ghi %KKno=m>Q>QR ,,,5i6
 6
 fggdkk88MDWDWXY 22;@Q@QT`bsStuI33L+BSBSVWUXYJ33L+BSBSVWUXYJ22;@Q@QTUSVWI;;==>TVaVgVgjkilm[[778H+J[J[^_]`aN;;<PR]RcRcfgehiL 
 
K	
 [[77kFWFWZdYef
J' KK>>!2!2\:4N
 	-.!!!;;k//,@Q1R
 	N+{{{t)))::[..z0J
 	M*tyyy{{11&+:K:KaSQD! IKK66@
	
 
KKK66<(
	 KM###;;==12

 	-.33+\;
S

 	V$&&	1@DOOL	)HYYgFhI &&0#00 ' I $$$%==))*W]]8-DDNNMNKK"&$(		
 			)T[[)
KK../k =[8 A_s   ?Aa'DAa,c                   V ^8  d   QhR\         P                  R\        \        ,          R\        P
                  R\        P
                  R\        R\        R\        \        \        ,          ,          R\        \        \        3,          /# )	r8   r   r   r  r
  r  r  bad_words_idsr:   )r=   r>   r   r   torchTensorr}   r;   r;  r<   r   )r?   s   "r(   r@   r@     s|     ?9 ?9


?977?9 ||?9 LL	?9
 ?9 ?9 S	??9 
#s(^?9r+   c                
   V P                   '       d0   \        P                  P                  4       '       g   \	        R4      hV P
                  \        P                  P                  8X  d   VP                  4        \        P                  ! V P                   '       d   RMR4      pVP                  V4       \        P                  ! R4       VP                  V4      pVP                  V4      p. p\        V P                  4       F  p	\        P                  ! 4       p
TP!                  TTV P"                  V P$                  V P&                  V P(                  V P*                  TTV P,                  V P.                  V P0                  V'       d   TMRRV P2                  ;'       g    V P4                  R7      p	VP7                  \        P                  ! 4       V
,
          4       K  	  VP8                  ^ ,          p^ RIHp V! W4      # )	ah  Test PyTorch performance of text generation.

Args:
    args (argparse.Namespace): arguments parsed from command line
    model (Union[GPT2LMHeadModel, T5ForConditionalGeneration]): PyTorch model
    input_ids (torch.Tensor): input_ids
    attention_mask (torch.Tensor): Attention mask
    eos_token_id (int): EOS token ID
    pad_token_id (int): Padding token ID
    bad_words_ids (List[List[int]]): Words shall not be generated.

Raises:
    RuntimeError: PyTorch with CUDA is not available for --use_gpu

Returns:
    Dict[str, Any]: A dictionary with string with metric name, and value can be integer or string.
z=Please install PyTorch with Cuda for testing gpu performance.zcuda:0cpuFNTr  r
  r  r  r  r_   r  r  r  r  r  r  r#  return_dict_in_generateoutput_scoresget_latency_result)rl   r$  cudais_availabler   r   r   r|   r&   halfdevicer  set_grad_enabledr  
total_runstimegenerater  r  r  r_   r  r  r  r  r]   r^   r   rD  benchmark_helperr,  )r   r   r  r
  r  r  r#  r0  torch_latencyr  startr  r,  s   &&&&&&&      r(   test_torch_performancer8    sz   4 |||EJJ3355Z[[~~**000

\\dlll(>F	HHV	5!V$I#&&v.NM4??#		NN)nn..!%!:!:%%!%!:!:..#66+8-d$(66RR$:R:R  
" 	TYY[501' $( #J3m88r+   c                 X   \         P                  ! V P                  \         P                  R 7      p\	        V P                  ^ ,          4       FY  p^ p\	        V P                  ^,          4       F4  pW,          V,          V8X  d   V^ 8X  d   ^ W#,          V&   K+  V^,          pK6  	  K[  	  V# )r   )r   onesrD  int32r  )r  r  r
  r  abs_posrO  s   &&    r(   create_attention_maskr=  9  s    WWY__BHH=N9??1%&yq)*A|A,.7a<'(!!$1	 + ' r+   c                t    V ^8  d   QhR\         P                  R\        \        ,          R,          R\        /# )r8   r   	sentencesN	is_greedy)r=   r>   r;   r<   r   )r?   s   "r(   r@   r@   E  s:     T T


TCy4T Tr+   c                   V P                   R8X  g   Q h\        P                  ! V P                  V P                  R7      pRVn        VP                  Vn        \        P                  ! V P                  V P                  VP                  R7      pVf   . R6OpV! VRRR7      pVR	,          pVR
,          pRpVP                  VRR7      p	V	 U
u. uF  q.NK  	  p	p
V P                  '       d   \        P                  RV	4       M. p	VP                  pVP                  pVP                  pVP                  p. pRpV P                   '       Eg   \#        R74       \#        R4       TP%                  TTV P&                  V P(                  V P*                  V P,                  V P.                  TTV P0                  V P2                  V P4                  V	'       d   T	MRRV P6                  ;'       g    V P8                  R7      p\#        R	V4       \#        R4       \#        RVP:                  4       V P6                  '       d   \#        RVP<                  4       V P8                  '       d   \#        RVP>                  4       \A        VP:                  4       F:  w  ppVPC                  VRR7      pVPE                  V4       \#        V RV 24       K<  	  \#        R74       \#        R4       V'       d   R	VPG                  4       PI                  4       PK                  \L        PN                  4      R\L        PP                  ! V P&                  .\L        PN                  R7      R\L        PP                  ! V P(                  .\L        PN                  R7      R\L        PP                  ! V P4                  .\L        PR                  R7      /pEMcR	VPG                  4       PI                  4       PK                  \L        PN                  4      R\L        PP                  ! V P&                  .\L        PN                  R7      R\L        PP                  ! V P(                  .\L        PN                  R7      R\L        PP                  ! V P*                  .\L        PN                  R7      R\L        PP                  ! V P0                  .\L        PN                  R7      R\L        PP                  ! V P2                  .\L        PR                  R7      R\L        PP                  ! V P4                  .\L        PR                  R7      /pV P                  '       dL   \L        PT                  ! V\L        PN                  R7      pV P                  '       d   V	 F  p^ VV&   K
  	  VVR&   V PV                  '       d   \Y        Wm4      VR
&   VPZ                  ^ ,          pV P\                  '       dC   \        P_                  R4       \L        PT                  ! VV3\L        PN                  R7      pVVR &   V P`                  '       d   \c        V Pd                  4      Pf                  Pi                  4       p\        P                  R!V4       ^ R"I5H6p \        P_                  R#V R$24       V.p\A        V4       F?  w  pp\n        Pp                  Ps                  VR%\u        V4      ,           4      pV! VV4       KA  	  \        P                  R&V4       V Pv                  '       d   R# \        P                  R'4       \y        V Pd                  V Pz                  V P|                  4      p\        P                  R(4       VP                  RV4      p. p\        V P                  4       FU  p\        P                  ! 4       p VP                  RV4      pVPE                  \        P                  ! 4       V ,
          4       KW  	  ^ R)ICHDp! VPZ                  ^ ,          pV!! VV4      p"\#        R*4       V^ ,          p#\#        RV#4       V P6                  '       d   \#        RV^,          4       V P8                  '       d   \#        RV^,          4       V'       da   V#PZ                  w  pp$. p%\        V4       F?  pVPC                  V#V,          RR7      pV%PE                  V4       \#        R+V R,V 24       KA  	  M|V#PZ                  w  pp&p$. p%\        V4       F[  p\        V&4       FI  p'VPC                  V#V,          V',          RR7      pV%PE                  V4       \#        R+V R-V' RV 24       KK  	  K]  	  V'       d   VP:                  P                  VV P0                  R84      p(\        P                  ! V#4      p)\#        R74       \#        R.4       \#        V(4       \#        V4       \#        R74       \#        R/4       \#        V)4       \#        V%4       \#        R74       VV%8H  p*\#        R0V*'       d   R1MR24       V*V"R3&   V P                  '       d   \        V VVVVVV	4      p+\#        R4V+4       \#        R5V"4       V"# u up
i )9a!  Test GPT-2 model

Args:
    args (argparse.Namespace): arguments parsed from command line
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
rG   r  left)r   r  NptTreturn_tensorsr   r  r
  walk in park)add_prefix_spacer#  CTest PyTorch model and beam search with huggingface transformers...r(  !huggingface transformers outputs:r  r  r  skip_special_tokens: 'Testing beam search with onnxruntime...r  r   r  r  r  r  r  r`   zYUse prefix vocab mask with all ones in ORT, but no corresponding setting for Torch model.rc   test_data_diroutput_test_datazSaving test_data to z/test_data_set_* ...test_data_set_
ORT inputszCreating ort session......zRun ort session......r+  ORT outputs:batch z sequence: 
 sequence Torch Sequences:ORT Sequences:zTorch and ORT result issame	differentparityTorch LatencyORT)zThe product is releasedzI enjoy walking in the parkzTest best way to invest2--------------------------------------------------rs   )Jr   r   r  r   r   padding_side	eos_token	pad_tokenr   r  encoder`   r   r   r  r  rm   rt  r4  r  r  r  r_   r  r  r  r  r]   r^   r  r  r  r  decoder   r'  numpyastyper   r;  arrayfloat32r:  rd   r=  rD  rc   r   rp   r   r   r   r
  bert_test_datarP  rx   ry   rw   r<   rn   r   rl   rj   runr  r2  r3  r5  r,  rJ  r$  
LongTensorro   r8  ),r   r?  r@  	tokenizerr   r  r  r
  	bad_wordsr#  word_idr  r  r  r  torch_decoded_sequencesbeam_outputsr  sequencedecoded_sequencer`   bad_word_idr  rc   rN  rP  
all_inputsdirr   resultlatencyr  r7  r,  r   r  r  ort_decoded_sequencesnum_sequencesrO  torch_sequencesort_sequencesis_sametorch_latency_outputs,   &&&                                         r(   test_gpt_modelr|  E  s    ??f$$$--d.E.EQUQ_Q_`I#I#--I++..++E 
	 ytDF{#I,-NI$$Y$FM.;<m7YmM<_m4\\F&&L&&L""J LhST~~)nn..!%!:!:%%!%!:!:..#66+8-d$(66RR$:R:R & 
" 	k9%12k<112'''$l&C&CD###(L//0$\%;%;<KAx(//d/S#**+;<QCr*+,- =
 
(O	
34..077A"((DOO#4BHHE"((DOO#4BHHE "((D,C,C+DBJJ"W	
 ..077A"((DOO#4BHHE"((DOO#4BHHE4>>"2"((C"BHHd.G.G-HPRPXPX$Ybhh(;(;'<BJJO "((D,C,C+DBJJ"W
 WWj:
???,*+
;'  -)|!!!#8#Q #JopGGZ$<BHHM&7"#T[[)0099;_m43*=/9MNOX
":.IAv'',,}.>Q.GHCS&) / LLv&
LL-.$T[[$,,@X@XYK
LL()__T6*F G4??#		OOD&)tyy{U*+ $
 4#J4F	.q	I	+y!### &),hq	"#,?? Z "z"A(//	!RV/W!(()9:F1#[)9(:;< #
 3<///]J "z"A=)#,#3#3IaLOY]#3#^ %,,-=>qcA3b1A0BCD * # &0088TE^E^`bc((3h !o%&hm#$h)-BB'7L"x5 
 	o34	%MY =s   h<c                h    V ^8  d   QhR\         P                  R\        \        ,          R,          /# )r8   r   r?  N)r=   r>   r;   r<   )r?   s   "r(   r@   r@     s-     z z** ztCy47G zr+   c                B   V P                   R39   g   Q hV P                  '       d   \        P                  R4       R# \        P
                  ! V P                  V P                  R7      pRVn        V P                   R8X  d.   \        P
                  ! V P                  V P                  R7      pM,\        P
                  ! V P                  V P                  R7      pVf   RR.pV! VRR	R
7      pVR,          pVR,          pRpVP                  V4      RR4 pV U	u. uF  q.NK  	  pp	V P                  '       d   \        P                  RV4       M. pVP                  p
V
P                  pV
P                  pV
P                   p\        P                  RV RV RV 24       . pV P"                  '       Eg   \%        R54       \%        R4       TP'                  TTV P(                  V P*                  V P,                  V P.                  V P0                  TTV P2                  V P4                  V P6                  V'       d   TMRR	V P8                  ;'       g    V P:                  R7      p\%        RV4       \%        R4       \%        RVP<                  4       V P8                  '       d   \%        RVP>                  4       V P:                  '       d   \%        RVP@                  4       \C        VP<                  4       F:  w  ppVPE                  VR	R7      pVPG                  V4       \%        V RV 24       K<  	  \%        R54       \%        R4       \H        PJ                  ! V\H        PL                  R7      pV P                  '       d   V F  p^ VV&   K
  	  RVPO                  4       PQ                  4       PS                  \H        PL                  4      R\H        PT                  ! V P(                  .\H        PL                  R7      R\H        PT                  ! V P*                  .\H        PL                  R7      R\H        PT                  ! V P,                  .\H        PL                  R7      R\H        PT                  ! V P2                  .\H        PL                  R7      R \H        PT                  ! V P4                  .\H        PV                  R7      R!\H        PT                  ! V P6                  .\H        PV                  R7      /pV P                  '       d   VVR"&   V PX                  '       d   \[        W\4      VR&   V P\                  '       d   \_        V P`                  4      Pb                  Pe                  4       p\        P                  R#V4       ^ R$I3H4p V.p\C        V4       F?  w  pp\j        Pl                  Po                  VR%\q        V4      ,           4      pV! VV4       KA  	  \        P                  R&V4       \s        V P`                  V Pt                  V Pv                  4      p. p\y        V Pz                  4       FU  p\|        P|                  ! 4       pVP                  RV4      pVPG                  \|        P|                  ! 4       V,
          4       KW  	  VP                  ^ ,          p^ R'IAHBp V! VV4      p \%        R(4       X^ ,          p!\%        RV!4       V P8                  '       d   \%        RV^,          4       V P:                  '       d   \%        RV^,          4       V!P                  w  pp"p#. p$\y        V4       F[  p\y        V"4       FI  p%VPE                  V!V,          V%,          R	R7      pV$PG                  V4       \%        R)V R*V% RV 24       KK  	  K]  	  V P"                  '       g   XP<                  P                  VV P2                  R44      p&\        P                  ! V!4      p'\%        R54       \%        R+4       \%        V&4       \%        V4       \%        R54       \%        R,4       \%        V'4       \%        V$4       \%        R54       VV$8H  p(\%        R-V('       d   R.MR/4       V(V R0&   V P                  '       d   \        V VVVVVV4      p)\%        R1V)4       \%        R2V 4       V # u up	i )6a%  Test T5 or MT5 model

Args:
    args (argparse.Namespace): arguments parsed from command line
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
rq   zLSkipping parity test as prefix vocab mask is not implemented by Hugging FaceNr  rB  z4translate English to French: The product is releasedzsummarize: research continues to show that pets bring real health benefits to their owners. Having a dog around can lead to lower levels of stress for both adults and kids.rC  TrD  r  r
  rF  r#  zeos_token_id:z, pad_token_id:z, vocab_size:rH  r(  rI  r  r  r  rJ  rL  rM  r   r  r  r  r  r  r  r`   rN  rO  rQ  rR  r+  rS  rT  rU  rV  rW  zTorch and ORT result is rX  rY  rZ  r[  r\  r  rs   r]  )Hr   rc   r   r   r   r  r   r   r^  r   r   ra  r`   r  r  r  r  rm   rt  r4  r  r  r  r_   r  r  r  r  r]   r^   r  r  r  r  rb  r   r   r:  r;  r'  rc  rd  re  rf  rd   r=  rp   r   r   r   r
  rg  rP  rx   ry   rw   r<   r   rl   rj   r  r2  r3  rh  rD  r5  r,  rJ  r$  ri  ro   r8  )*r   r?  rj  r   r  r  r
  rk  r#  rl  r  r  r  r  rm  rn  r  ro  rp  r`   rq  rN  rP  rr  rs  r   ru  r  r7  rt  r  r,  r   r  rw  r  rv  rO  rx  ry  rz  r{  s*   &&                                        r(   test_t5_modelr    s    ??m+++cd++D,C,Ct~~^I#I$*::##nn

 ,;;##nn
 B {
	 ytDF{#I,-NI$$Y/4M.;<m7YmM<_m4\\F&&L&&L""J
LL=ol^=YcXdef hST~~)nn..!%!:!:%%!%!:!:..#66+8-d$(66RR$:R:R & 
$ 	k9%12k<112'''$l&C&CD###(L//0$\%;%;<KAx(//d/S#**+;<QCr*+,- =
 
(O	
34*RXX6J(K&'J{# ) 	Y]]_**,33BHH=bhh0Abhh0ARXXt~~.bhh?$*C*C)DBHH U"((D$7$7#8

Kbhh(?(?'@

SF )|!!!#8#Q T[[)0099;_m43X
":.IAv'',,}.>Q.GHCS&) / LLv&$T[[$,,@X@XYK G4??#		v.tyy{U*+ $ #J34F	.q	I	+y!### &),hq	".7oo+Z
:}%A(//	!QUY/Z!(()9:F1#Zs"-=,>?@ &  &0088TE^E^`bc((3h !o%&hm#$h)-BB(G&M"x5 
 	o34	%M[ =s   bc                x    V ^8  d   QhR\         \        ,          R,          R\         \        ,          R,          /# )r8   r9   Nr?  )r;   r<   )r?   s   "r(   r@   r@     s,     8 8tCy4 849t3C 8r+   c                   \        V 4      p\        VP                  4       VP                  R9   Ed   VP                  '       dH   \
        P                  P                  VP                  4      '       g   \        RVP                   24      hVP                  '       dH   \
        P                  P                  VP                  4      '       g   \        RVP                   24      hVP                  '       d   VP                  '       d%   VP                  '       d   VP                  '       g   \        R4      hVP                  ^8H  ;'       d    VP                  ^8H  pVP                  R8X  d   V'       d   VP                  R8  dy   VP                  R8  dh   \        V\        P                  4       \         P#                  R4       VP                  R8  g%   VP$                  '       g   VP&                  '       d   R	# M&\        V\        P(                  4       M\        V4       \         P#                  R
4       VP                  R9   d   \+        W!R7      pM\-        W!VR7      pV'       dg   VP.                  '       d3   \         P#                  RVP0                   RVP0                   R24       V# \         P#                  RVP0                   24       V# )a  Main entry function

Args:
    argv (Optional[List[str]], optional): _description_. Defaults to None.
    sentences (Optional[List[str]], optional): input text. Defaults to None.

Raises:
    ValueError: Path does not exist: --encoder_decoder_init_onnx
    ValueError: Path does not exist: --decoder_onnx
    ValueError: --decoder_onnx and --encoder_decoder_init_onnx are not used together for T5

Returns:
    Union[Dict[str, Any], None]: A dictionary with string with metric name, and value can be integer or string.
z1Path does not exist: --encoder_decoder_init_onnx z$Path does not exist: --decoder_onnx zB--decoder_onnx shall use together with --encoder_decoder_init_onnxrG   ri   rg   zThe test for gpt2_sampling onnx model is limited to non-custom model with small top_p(e.g <=0.01) value. The result should be the same as gpt2 greedy search.g{Gz?Nzstart testing model...)r?  )r?  r@  zOutput files: rC   z.datazOutput file: r  )r   r   rO   r   r   rx   ry   r	  r  r   r  r  r  r!  r    r3   r   r   r  rf   r2   r  r|  rW   r   )r9   r?  r   r@  rt  s   &&   r(   r   r     s     4 D-')))"''..A_A_2`2`PQUQoQoPpqrrRWW^^D4E4E%F%FCDDUDUCVWXX***43D3D3Dd&D&D&Dabb!#FF(A(AQ(FI& Y::

S 0$T>+B+BCKK p zzD DKKK4999 4= %T>+F+FG &
KK()-'t9YO(((KK.R}EJK M KK-}56Mr+   __main__r%   )T)shared_   NN)r  )r  r  rs   )NF)NN)Z__doc__r=   loggingr   rx   r3  enumr   pathlibr   typingr   rc  r   r   r$  r5  r   r   fusion_utilsr   r   r	   r
   r  r   transformersr   r   r   r   r   r   r   r   onnxruntimer   r   r   r   4onnxruntime.transformers.models.gpt2.convert_to_onnxr   r   0onnxruntime.transformers.models.gpt2.gpt2_helperr   2onnxruntime.transformers.models.t5.convert_to_onnxr   r   ,onnxruntime.transformers.models.t5.t5_helperr   r   	getLoggerr   r    r   r   r   r   r   r   r  r)  r3  rQ  r^  rc  ro  rv  r~  r  r  r  r  r  r  r)  rp  r  r  r  r  r  r  r1   r!  r8  r=  r|  r  r-   r,   r+   r(   <module>r     s  
#J    	        4 $ 4 4  	 	 	  T
 
		2	T Qh*)Z!<O$K\B5pIoXYaxg!T$>"#Ji.bSlf3R* 9:WY 1hf$RDNk\&fR\~8v@hV	"0N '5&?&?y0x?9D	Tnzz8v zF r+   