+
    9iY                        ^ RI t ^ RIt^ RIt^ RIHt ^ RIt^ RIHtH	t	H
t
 ^ RIHt R R ltR R ltR	 R
 ltRR R lltR R ltR R ltRR R lltR tRR R lltRR R lltR tR R ltR t]R8X  d
   ]! 4        R# R# )     N)Path)
ModelProtoTensorProtonumpy_helper)	OnnxModelc          
      h    V ^8  d   QhR\         R\        R\        R\        R\        P                  /# )   	input_ids
batch_sizesequence_lengthdictionary_sizereturnr   intnpndarray)formats   "e/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/bert_test_data.py__annotate__r      s8      (+>ATWZZ    c                8   V P                   P                  P                  \        P                  \        P
                  \        P                  39   g   Q h\        P                  P                  W1V3\        P                  R7      pV P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# V P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# )a@  Create input tensor based on the graph input of input_ids

Args:
    input_ids (TensorProto): graph input of the input_ids input tensor
    batch_size (int): batch size
    sequence_length (int): sequence length
    dictionary_size (int): vocabulary size of dictionary

Returns:
    np.ndarray: the input tensor created
)sizedtype)typetensor_type	elem_typer   FLOATINT32INT64r   randomrandintint32float32int64)r
   r   r   r   datas   &&&& r   fake_input_ids_datar&      s     >>%%//4    99_3PXZX`X`aD~~!!++{/@/@@zz$ K 
	#	#	-	-1B1B	Bxx~Kr   c                \    V ^8  d   QhR\         R\        R\        R\        P                  /# )r	   segment_idsr   r   r   r   )r   s   "r   r   r   1   s0      {  VY ^`^h^h r   c                $   V P                   P                  P                  \        P                  \        P
                  \        P                  39   g   Q h\        P                  ! W3\        P                  R7      pV P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# V P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# )a  Create input tensor based on the graph input of segment_ids

Args:
    segment_ids (TensorProto): graph input of the token_type_ids input tensor
    batch_size (int): batch size
    sequence_length (int): sequence length

Returns:
    np.ndarray: the input tensor created
r   )r   r   r   r   r   r   r   r   zerosr"   r#   r$   )r(   r   r   r%   s   &&& r   fake_segment_ids_datar,   1   s     ''116    88Z1BD##--1B1BBzz$ K 
			%	%	/	/;3D3D	Dxx~Kr   c                0    V ^8  d   QhR\         R\         /# )r	   max_sequence_lengthaverage_sequence_length)r   )r   s   "r   r   r   L   s     B B3 B Br   c                     V^8  d   W8:  g   Q h^V,          V 8  d&   \         P                  ! ^V,          V ,
          V 4      # \         P                  ! ^^V,          ^,
          4      # )   )r    r!   )r.   r/   s   &&r   get_random_lengthr2   L   sa    "a',C,ZZZ 	""%88~~a"99<OOQdee~~a%<!<q!@AAr   c                    V ^8  d   QhR\         R\        R\        R\        R\        R\        R\        P                  /# )r	   
input_maskr   r   r/   random_sequence_length	mask_typer   )r   r   boolr   r   )r   s   "r   r   r   V   s[     F FFF F !	F
 !F F ZZFr   c                    V P                   P                  P                  \        P                  \        P
                  \        P                  39   g   Q hV^8X  dh   \        P                  ! V\        P                  R7      pV'       d"   \        V4       F  p\        W#4      Wg&   K  	  EM?\        V4       F  pW6V&   K	  	  EM'V^8X  d   \        P                  ! W3\        P                  R7      pV'       d8   \        V4       F&  p\        W#4      p\        V4       F	  p	^WgV	3&   K  	  K(  	  EM\        P                  ! W3\        P                  R7      p
WRV
P                  ^ ,          1RV
P                  ^,          13&   EMeV^8X  g   Q h\        P                  ! V^,          ^,           \        P                  R7      pV'       d   \        V4       F  p\        W#4      Wg&   K  	  \        V^,           4       F  pV^ 8  d+   WaV,           ^,
          ,          Wg^,
          ,          ,           M^ WaV,           &   V^ 8  d+   WaV,           ^,
          ,          Wg^,
          ,          ,           M^ V^V,          ^,           V,           &   K  	  M`\        V4       F  pW6V&   K	  	  \        V^,           4       F4  pWs,          WaV,           &   Ws,          V^V,          ^,           V,           &   K6  	  V P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# V P                   P                  P                  \        P                  8X  d   \        P                  ! V4      pV# )a  Create input tensor based on the graph input of segment_ids.

Args:
    input_mask (TensorProto): graph input of the attention mask input tensor
    batch_size (int): batch size
    sequence_length (int): sequence length
    average_sequence_length (int): average sequence length excluding paddings
    random_sequence_length (bool): whether use uniform random number for sequence length
    mask_type (int): mask type - 1: mask index (sequence length excluding paddings). Shape is (batch_size).
                                 2: 2D attention mask. Shape is (batch_size, sequence_length).
                                 3: key len, cumulated lengths of query and key. Shape is (3 * batch_size + 2).

Returns:
    np.ndarray: the input tensor created
r*   N)r   r   r   r   r   r   r   r   onesr"   ranger2   r+   shaper#   r$   )r4   r   r   r/   r5   r6   r%   iactual_seq_lenjtemps   &&&&&&     r   fake_input_mask_datar@   V   s   0 ??&&005    A~ww
2884!:&+OU ' :&1Q '	axx5RXXF!:&!2?!\~.A!"DAJ / '
 77J@QD594::a=/DJJqM/12A~~xxa!+BHH=!:&+OU ' :>*QRUVQVtNQ,>'?$1u+'M\]!^$YZ]^Y^tNQ4F/G$STu+/UdeQ^a'!+, + :&1Q ':>*'('B!^$/0/JQ^a'!+, + "",,0A0AAzz$ K 
	$	$	.	.+2C2C	Cxx~Kr   c                f    V ^8  d   QhR\         R\        \         \        P                  3,          /# )r	   	directoryinputs)strdictr   r   )r   s   "r   r   r      s'     3 3 3T#rzz/-B 3r   c           	     V   \         P                  P                  V 4      '       g(    \         P                  ! V 4       \	        RV  R24       M\	        RV  R24       \        VP                  4       4       F|  w  pw  r4\        P                  ! WC4      p\        \         P                  P                  V RV R24      R	4      ;_uu_ 4       pVP                  VP                  4       4       R
R
R
4       K~  	  R
#   \
         d    \	        RT  R24        Li ; i  + '       g   i     K  ; i)zOutput input tensors of test data to a directory

Args:
    directory (str): path of a directory
    inputs (Dict[str, np.ndarray]): map from input name to value
z#Successfully created the directory  zCreation of the directory z failedzWarning: directory z$ existed. Files will be overwritten.input_.pbwbN)ospathexistsmkdirprintOSError	enumerateitemsr   
from_arrayopenjoinwriteSerializeToString)rB   rC   indexnamer%   tensorfiles   &&     r   output_test_datar\      s     77>>)$$	FHHY 7	{!DE#I;.RST(8|((4"'',,yF5'*=>EEJJv//12 FE  9  	C.ykAB	C FEEs   C7 	 D7DDD(c                    V ^8  d   QhR\         R\         R\         R\         R\        R\         R\        R\        R	\        R
\         R\        R\         /# )r	   r   r   
test_casesr   verboserandom_seedr
   r(   r4   r/   r5   r6   r   r7   r   )r   s   "r   r   r      s     7 777 7 	7
 7 7 7 7 7 !7 !7 7r   c           	        Vf   Q h\         P                  P                  V4       \        P                  ! V4       . p\        V4       F  p\	        W`W4      pVP
                  V/pV'       d   \        WpV4      WP
                  &   V'       d   \        WWW4      WP
                  &   V'       d   \        V4      ^ 8X  d   \        RV4       VP                  V4       K  	  V# )a  Create given number of input data for testing

Args:
    batch_size (int): batch size
    sequence_length (int): sequence length
    test_cases (int): number of test cases
    dictionary_size (int): vocabulary size of dictionary for input_ids
    verbose (bool): print more information or not
    random_seed (int): random seed
    input_ids (TensorProto): graph input of input IDs
    segment_ids (TensorProto): graph input of token type IDs
    input_mask (TensorProto): graph input of attention mask
    average_sequence_length (int): average sequence length excluding paddings
    random_sequence_length (bool): whether use uniform random number for sequence length
    mask_type (int): mask type 1 is mask index; 2 is 2D mask; 3 is key len, cumulated lengths of query and key

Returns:
    List[Dict[str,numpy.ndarray]]: list of test cases, where each test case is a dictionary
                                   with input name as key and a tensor as value
zExample inputs)r   r    seedr:   r&   rY   r,   r@   lenrO   append)r   r   r^   r   r_   r`   r
   r(   r4   r/   r5   r6   
all_inputs
_test_caseinput_1rC   s   &&&&&&&&&&&&    r   fake_test_datari      s    D    IINN;
KKJJ'
%i_^..'*'<[Ve'fF##$&:Rh'F??# s:!+"F+&! ( r   c                    V ^8  d   QhR\         R\         R\         R\         R\        R\        R\        R\        R	\         R
\        R\         R\         /# )r	   r   r   r^   rc   r_   r
   r(   r4   r/   r5   r6   r   ra   )r   s   "r   r   r      s     1 111 1 	1
 1 1 1 1 !1 !1 1 1r   c                h    \        V VVVVVVVVVV	V
4      p\        V4      V8w  d   \        R4       V# )au  Create given number of input data for testing

Args:
    batch_size (int): batch size
    sequence_length (int): sequence length
    test_cases (int): number of test cases
    seed (int): random seed
    verbose (bool): print more information or not
    input_ids (TensorProto): graph input of input IDs
    segment_ids (TensorProto): graph input of token type IDs
    input_mask (TensorProto): graph input of attention mask
    average_sequence_length (int): average sequence length excluding paddings
    random_sequence_length (bool): whether use uniform random number for sequence length
    mask_type (int): mask type 1 is mask index; 2 is 2D mask; 3 is key len, cumulated lengths of query and key

Returns:
    List[Dict[str,numpy.ndarray]]: list of test cases, where each test case is a dictionary
                                   with input name as key and a tensor as value
z$Failed to create test data for test.)ri   rd   rO   )r   r   r^   rc   r_   r
   r(   r4   r/   r5   r6   r   rf   s   &&&&&&&&&&&& r   generate_test_datarl      sP    B  J :*$45r   c                    V\        VP                  4      8  d   R # VP                  V,          pV P                  V4      pVfI   V P                  W4      pVe4   VP                  R8X  d#   V P                  VP                  ^ ,          4      pV# )NCast)rd   inputfind_graph_input
get_parentop_type)
onnx_model
embed_nodeinput_indexro   graph_inputparent_nodes   &&&   r   get_graph_input_from_embed_noderx   $  s    c***++[)E--e4K ++JD"{':':f'D$55k6G6G6JKKr   c                   V ^8  d   QhR\         R\        R,          R\        R,          R\        R,          R\        \        P                  R,          \        P                  R,          \        P                  R,          3,          /# )r	   rs   input_ids_nameNsegment_ids_nameinput_mask_namer   )r   rD   tupler   r   )r   s   "r   r   r   1  s{     YX YXYX$JYX DjYX 4Z	YX
 2::bjj4/d1BBCYXr   c                   V P                  4       pVe   V P                  V4      pVf   \        RV 24      hRpV'       d$   V P                  V4      pVf   \        RV 24      hRpV'       d$   V P                  V4      pVf   \        RV 24      h^V'       d   ^M^ ,           V'       d   ^M^ ,           p\        V4      V8w  d   \        RV R\        V4       24      hWVV3# \        V4      ^8w  d   \        R\        V4       24      hV P	                  R4      p	\        V	4      ^8X  ds   V	^ ,          p
\        W
^ 4      p\        W
^4      p\        W
^4      pVf/   V F(  pVP                  P                  4       pRV9   g   K&  TpK*  	  Vf   \        R4      hWVV3# RpRpRpV F<  pVP                  P                  4       pRV9   d   TpK(  R	V9   g   R
V9   d   TpK:  TpK>  	  V'       d   V'       d   V'       d   WVV3# \        R4      h)a  Find graph inputs for BERT model.
First, we will deduce inputs from EmbedLayerNormalization node.
If not found, we will guess the meaning of graph inputs based on naming.

Args:
    onnx_model (OnnxModel): onnx model object
    input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
    segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
    input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

Raises:
    ValueError: Graph does not have input named of input_ids_name or segment_ids_name or input_mask_name
    ValueError: Expected graph input number does not match with specified input_ids_name, segment_ids_name
                and input_mask_name

Returns:
    Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: input tensors of input_ids,
                                                                             segment_ids and input_mask
Nz Graph does not have input named zExpect the graph to have z inputs. Got z'Expect the graph to have 3 inputs. Got EmbedLayerNormalizationmaskz#Failed to find attention mask inputtokensegmentz?Fail to assign 3 inputs. You might try rename the graph inputs.)'get_graph_inputs_excluding_initializersrp   
ValueErrorrd   get_nodes_by_op_typerx   rY   lower)rs   rz   r{   r|   graph_inputsr
   r(   r4   expected_inputsembed_nodesrt   ro   input_name_lowers   &&&&         r   find_bert_inputsr   1  s2   4 EEGL!//?	??OPQQ$556FGK" #CDTCU!VWW
#44_EJ! #COCT!UVVKqQ7
1PQR|/88IWZ[gWhVijkkz11
<AB3|CTBUVWW112KLK
;1 ^
3JAN	5jaP4ZQO
%#(::#3#3#5 --!&J & BCCz11 IKJ ::++-%%J''98H+HKI  [Zz11
V
WWr   c                   V ^8  d   QhR\         R\         R,          R\         R,          R\         R,          R\        \        P                  R,          \        P                  R,          \        P                  R,          3,          /# )r	   	onnx_filerz   Nr{   r|   r   )rD   r}   r   r   )r   s   "r   r   r     st     [ [[$J[ Dj[ 4Z	[
 2::bjj4/d1BBC[r   c                    \        4       p\        V R4      ;_uu_ 4       pVP                  VP                  4       4       RRR4       \	        V4      p\        WaW#4      #   + '       g   i     L'; i)a  Find graph inputs for BERT model.
First, we will deduce inputs from EmbedLayerNormalization node.
If not found, we will guess the meaning of graph inputs based on naming.

Args:
    onnx_file (str): onnx model path
    input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
    segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
    input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

Returns:
    Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]: input tensors of input_ids,
                                                                             segment_ids and input_mask
rbN)r   rT   ParseFromStringreadr   r   )r   rz   r{   r|   modelr[   rs   s   &&&&   r   get_bert_inputsr     sV    ( LE	i		$diik* 
 5!JJ8HZZ	 
	s    AA.	c                  v   \         P                  ! 4       p V P                  R R\        RR7       V P                  RR\        RRR7       V P                  R	R\        ^R
R7       V P                  RR\        ^RR7       V P                  RR\        RRR7       V P                  RR\        RRR7       V P                  RR\        RRR7       V P                  RR\        ^RR7       V P                  RR\        ^RR7       V P                  RRRRR7       V P                  RR7       V P                  RRRRR7       V P                  RR7       V P                  RR R)\        R!R"7       V P                  R#R$RRR%R7       V P                  RR&7       V P                  R'R\        ^R(R7       V P                  4       pV# )*z--modelTzbert onnx model path.)requiredr   helpz--output_dirFNz4output test data path. Default is current directory.)r   r   defaultr   z--batch_sizezbatch size of inputz--sequence_lengthz maximum sequence length of inputz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz	--samplesz$number of test cases to be generatedz--seedzrandom seedz	--verbose
store_truezprint verbose information)r   actionr   )r_   z--only_input_tensorsz-only save input tensors and no output tensors)only_input_tensorsz-az--average_sequence_lengthz)average sequence length excluding padding)r   r   r   z-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r5   z--mask_typez^mask type: (1: mask index, 2: raw 2D mask, 3: key lengths, cumulated lengths of query and key))argparseArgumentParseradd_argumentrD   r   set_defaults
parse_args)parserargss     r   parse_argumentsr     s'   $$&F
	DsAXY
C   S!Rgh
/   '   )   ,   3   5sAMZ
(	   &
<	   51
#8   "B   u5
m   DKr   c                    V ^8  d   QhR\         R\         R\        R\        R\        R\        R\        R\         R	,          R
\         R	,          R\         R	,          R\        R\        R\        R\        /# )r	   r   
output_dirr   r   r^   rc   r_   rz   Nr{   r|   r   r/   r5   r6   )rD   r   r7   )r   s   "r   r   r     s     I> I>I>I> I> 	I>
 I> I> I> $JI> DjI> 4ZI> I> !I> !I> I>r   c                   \        WW4      w  rp\        VVVVVVVVVVV4      p\        V4       FB  w  pp\        P                  P                  VR\        V4      ,           4      p\        VV4       KD  	  V
'       d   R# ^ RIpRVP                  4       9   d   RR.MR.pVP                  V VR7      pVP                  4        Uu. uF  pVP                  NK  	  pp\        V4       F  w  pp\        P                  P                  VR\        V4      ,           4      pVP                  VV4      p\        V4       F  w  pp\        P                  ! \         P"                  ! VV,          4      V4      p\%        \        P                  P                  VRV R24      R4      ;_uu_ 4       pVP'                  VP)                  4       4       RRR4       K  	  K  	  R# u upi   + '       g   i     K  ; i)	a	  Create test data for a model, and save test data to a directory.

Args:
    model (str): path of ONNX bert model
    output_dir (str): output directory
    batch_size (int): batch size
    sequence_length (int): sequence length
    test_cases (int): number of test cases
    seed (int): random seed
    verbose (bool): whether print more information
    input_ids_name (str): graph input name of input_ids
    segment_ids_name (str): graph input name of segment_ids
    input_mask_name (str): graph input name of input_mask
    only_input_tensors (bool): only save input tensors,
    average_sequence_length (int): average sequence length excluding paddings
    random_sequence_length (bool): whether use uniform random number for sequence length
    mask_type(int): mask type
test_data_set_NCUDAExecutionProviderCPUExecutionProvider)	providersoutput_rI   rJ   )r   rl   rQ   rK   rL   rU   rD   r\   onnxruntimeget_available_providersInferenceSessionget_outputsrY   runr   rS   r   asarrayrT   rV   rW   )r   r   r   r   r^   rc   r_   rz   r{   r|   r   r/   r5   r6   r
   r(   r4   rf   r<   rC   rB   r   r   sessionoutputoutput_namesresultoutput_nametensor_resultr[   s   &&&&&&&&&&&&&&                r   create_and_save_test_datar     s   D *9P`)r&IJ#J z*	6GGLL-=A-FG	F+ +  #k&I&I&KK 
!"89$% 
 **5I*FG.5.A.A.CD.CFFKK.CLDz*	6GGLL-=A-FG	\62'5NA{(33BJJvay4I;WMbggll9s#.>?FF$

=::<= GF 6 + E GFFs   G( G	G0c                     \        4       p V P                  ^ 8:  d   V P                  V n        V P                  pVfZ   \	        V P
                  4      p\        P                  P                  VP                  RV P                   RV P                   24      pVe    \	        V4      pVP                  RRR7       M\        R4       \        V P
                  VV P                  V P                  V P                  V P                  V P                   V P"                  V P$                  V P&                  V P(                  V P                  V P*                  V P,                  4       \        RV4       R# )r   Nbatch__seq_T)parentsexist_okz7Directory existed. test data files will be overwritten.z Test data is saved to directory:)r   r/   r   r   r   r   rK   rL   rU   parentr   rN   rO   r   samplesrc   r_   rz   r{   r|   r   r5   r6   )r   r   prL   s       r   mainr   Y  s   D##q('+';';$JWW\\!((fT__4EU4K_K_J`,ab
J

4$
/GH

		$$##" 

,j9r   __main__)r	   )i'  )NNN)r   rK   r    pathlibr   numpyr   onnxr   r   r   rs   r   r&   r,   r2   r@   r\   ri   rl   rx   r   r   r   r   r   __name__ r   r   <module>r      s     	    6 6  <6BFR3.7t1h
YXx[8aHI>X$:N zF r   