+
    9i                         ^ RI Ht ^ RIt^ RIHt ^ RIHt ^ RIH	t	H
t
 ^ RIHtHtHtHt ^ RIHt ]! ]4      t ! R R	4      t ! R
 R]4      tR# )    )	getLoggerN)FusionAttentionMaskFormat)FusionUtilsNumpyHelper)	NodeProtoTensorProtohelpernumpy_helper	OnnxModelc                   f   a  ] tR t^t o RtV 3R lR ltV 3R lR ltR tR tV 3R lR	 lt	R
t
V tR# )AttentionMask2
Fuse Attention subgraph into one Attention node.
c                    < V ^8  d   QhRS[ /# )   modelr   )format__classdict__s   "g/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/fusion_attention.py__annotate__AttentionMask.__annotate__   s     7 7i 7    c                    Wn         / V n        / V n        \        V4      V n        \
        P                  V n        VP                  4       V n	        R # N)
r   mask_indicemask_castedr   utilsr   MaskIndexEndmask_formatget_opset_versionopset_version)selfr   s   &&r   __init__AttentionMask.__init__   sB    
 '
.;;"446r   c                    < V ^8  d   QhRS[ /# )r   r!   r   )r   r   s   "r   r   r       s     ' '+> 'r   c                    Wn         R # r   )r!   )r$   r!   s   &&r   set_mask_formatAttentionMask.set_mask_format    s    &r   c                t    WP                   9   d   W P                   V,          8X  g   Q hW P                   V&   R # r   )r   )r$   mask
mask_indexs   &&&r   set_mask_indiceAttentionMask.set_mask_indice#   s3    ###!1!1$!7777!+r   c                v    \        V P                  4      ^ 8  g   Q h\        \        V P                  4      4      # r   )lenr   nextiter)r$   s   &r   get_first_maskAttentionMask.get_first_mask(   s1    4##$q(((D))*++r   c                4   < V ^8  d   QhRS[ RS[ R,          /# )r   mask_2dreturnNstr)r   r   s   "r   r   r   ,   s     8 8C 8C$J 8r   c           
        V P                   \        P                  8X  d   R # WP                  9   d   V P                  V,          # V P                  P                  V4      '       d   V P                  P                  V4      w  r#MV P                  P                  V4      w  r4RpV'       d   W0P                  V&   V P                   \        P                  8X  d   W0P                  V&   V# V P                  P                  R4      pV P                  ^8  d   \        P                  ! RV.V.V P                  P                  RR4      R7      pVP                  P!                  \        P"                  ! R^.4      \        P"                  ! R^ 4      .4       MRpV P                  P%                  V4      fE   V P                  P'                  \        P(                  ! V\*        P,                  ^.^.R	R
7      4       \        P                  ! RW7.V.V P                  P                  RR4      R7      pVP                  P!                  \        P"                  ! R^ 4      .4       V P                  P/                  V4       WPP                  V&   V# )NTr-   	ReduceSumMaskReduceSuminputsoutputsnameaxeskeepdimsort_const_1_reduce_sum_axesFrB   	data_typedimsvalsraw)r!   r   NoMaskr   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32r   r   create_node_namer#   r   	make_node	attributeextendmake_attributeget_initializeradd_initializermake_tensorr
   INT64add_node)r$   r8   casted
input_name
_cast_nodeoutput_namemask_index_node	axes_names   &&      r   process_maskAttentionMask.process_mask,   s$   2999&&&##G,, ::&&w//!%!E!Eg!NFJ%)ZZ%C%CG%L"JF(2W% 2@@@(2W% jj11,?"$.."|$ZZ00oN	O %%,,f.C.CFQC.PRXRgRghrtuRv-wx 6Izz)))4<

**&&&"-"3"3SS! %..".$ZZ00oN	O %%,,f.C.CJPQ.R-ST

O,$/!r   )r   r!   r   r   r#   r   N)__name__
__module____qualname____firstlineno____doc__r%   r)   r.   r5   r_   __static_attributes____classdictcell__)r   s   @r   r   r      s2     7 7' ',
,8 8r   r   c                     a a ] tR t^gt oRtRRRRR.3V3R lV 3R llltV3R lR	 ltV3R
 lR ltV3R lR ltV3R lR lt	V3R lR lt
V3R lR ltV3R lR ltV3R lR ltRV3R lR lltRV3R lR lltR tRtVtV ;t# ) FusionAttentionr   NFSkipLayerNormalizationLayerNormalizationc                b   < V ^8  d   QhRS[ RS[RS[RS[R,          RS[RS[RS[S[,          /# )	r   r   hidden_size	num_headsattention_maskNuse_multi_head_attention!disable_multi_head_attention_biassearch_op_types)r   intr   boollistr;   )r   r   s   "r   r   FusionAttention.__annotate__l   sZ     % %% % 	%
 &,% #'% ,0% c%r   c                   < V'       d   R MRp\         S	V `  WV4       W n        W0n        V'       d   TM
\	        V4      V n        WPn        W`n        RV n        RV n	        RV n
        RV n        RV n        R# )MultiHeadAttention	AttentionNT)superr%   rm   rn   r   ro   rp   rq   mask_filter_valuenum_heads_warninghidden_size_warningshape_infershape_infer_done)
r$   r   rm   rn   ro   rp   rq   rr   attention_op_name	__class__s
   &&&&&&&& r   r%   FusionAttention.__init__l   st     5M0R]?C&"0>nMRWDX(@%1R.!% "&#'  $r   c                <   < V ^8  d   QhRS[ RS[S[S[3,          /# )r   concatr9   r	   tuplers   )r   r   s   "r   r   rv      s&     0 0	 0eTWY\T\o 0r   c                   \        VP                  4      ^8X  d   V P                  P                  VP                  ^,          4      pV P                  P                  VP                  ^,          4      p\	        V\
        P                  4      '       dc   VP                  ^8X  dR   \	        V\
        P                  4      '       d2   VP                  ^8X  d!   V^ ,          V^ ,          V^ ,          ,          3# V P                  V P                  3# )a  
Detect num_heads and hidden_size from Concat node in the following subgraph:

SkipLayerNormalization or EmbedLayerNormalization
                /        |
             MatMul    Shape
                |        |
               Add     Gather(indices=0)
                |        |
                |      Unsqueeze
                |        |
                |     Concat (*, -1, 12, 64)
                |     /
               Reshape
                  |
               Transpose
)
r2   inputr   get_constant_value
isinstancenpndarraysizern   rm   )r$   r   rn   	head_sizes   &&  r   )get_num_heads_and_hidden_size_from_concat9FusionAttention.get_num_heads_and_hidden_size_from_concat   s    $ v||!

55fll1oFI

55fll1oFI9bjj11NNa'y"**55NNa' |Yq\IaL%@@@~~t////r   c                <   < V ^8  d   QhRS[ RS[S[S[3,          /# )r   	reshape_qr9   r   )r   r   s   "r   r   rv      s$     ,& ,&y ,&U3PS8_ ,&r   c                   V P                   P                  VP                  ^,          4      pVf   V P                   P                  V^4      pVe#   VP                  R8X  d   V P                  V4      # \        P                  RVP                  ^,          4       V P                  V P                  3# \        V\        P                  4      '       d-   \        V4      ^8w  g   V^,          ^ 8:  g   V^,          ^ 8:  d/   \        P                  RV4       V P                  V P                  3# V^,          pV^,          pWE,          pV P                  ^ 8  dK   W@P                  8w  d;   V P                  '       d)   \        P                  RV P                  V4       RV n        V P                  ^ 8  dK   W`P                  8w  d;   V P                   '       d)   \        P                  RV P                  V4       RV n        WF3# )zDetect num_heads and hidden_size from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q

Returns:
    Tuple[int, int]: num_heads and hidden_size
Concatz%s is not initializer.zGq_shape_value=%s. Expected value are like [0, 0, num_heads, head_size].z>--num_heads is %d. Detected value is %d. Using detected value.Fz@--hidden_size is %d. Detected value is %d. Using detected value.)r   r   r   
get_parentop_typer   loggerdebugrn   rm   r   r   r   r2   r|   warningr}   )r$   r   q_shape_valuer   rn   r   rm   s   &&     r   get_num_heads_and_hidden_size-FusionAttention.get_num_heads_and_hidden_size   s    

55iooa6HI ZZ**9a8F!fnn&@EEfMMLL19??13EF>>4#3#333 M2::66=!Q&a A%q)9Q)>LLbdqr>>4#3#333!!$	!!$	+>>A)~~"=%%%TVZVdVdfo */&aK3C3C$C'''VX\XhXhju ,1(%%r   c                    < V ^8  d   QhRS[ /# r   add_qk)r	   )r   r   s   "r   r   rv      s      Y r   c                   V P                   '       g)   V P                  P                  R R7      V n        R V n         V P                  f   R# V P                  P	                  VP
                  ^ ,          4      pV P                  P	                  VP
                  ^,          4      pVe   Vf   \        P                  RV4       R# W#8w  d   \        P                  RV4       R# VP
                  ^,          # )T)updateNzone of the inputs of %s is Nonez)the shape of two inputs of %s is not same)r   r   infer_runtime_shaper~   get_edge_shaper   r   r   )r$   r   input_0_shapeinput_1_shapes   &&  r   get_add_qk_strFusionAttention.get_add_qk_str   s    $$$#zz==T=JD$(D!#((77QH((77QH M$9LL:FC)LLDfM||Ar   c                    < V ^8  d   QhRS[ /# r   r:   )r   r   s   "r   r   rv      s        S  r   c                  a VR ,           o\        \        V3R lV P                  4      4      p\        V4      ^8X  d   S# \        V4      ^ 8X  g   Q hV P                  P                  R4      p\        P                  ! R\        V P                  4       Uu. uF  qANK  	  upS.V^R7      pV P                  P                  V4       V P                  V P                  V&   S# u upi )_maskc                 0   < V P                   ^ ,          S8H  # r1   )output)nodemask_output_names   &r   <lambda>0FusionAttention.reshape_add_qk.<locals>.<lambda>   s    t{{1~AQ/Qr   r   r@   rA   rB   axis)ru   filternodes_to_addr2   r   rO   r   rP   rangern   appendthis_graph_namenode_name_to_graph_name)r$   r   concat_nodeconcat_node_name_concat_add_qk_fp32r   s   &&    @r   reshape_add_qkFusionAttention.reshape_add_qk   s     "G+ 6"QSWSdSdef{q ##;1$$$::66x@#--$)$..$9:$9qF$9:%&!
 	  !349=9M9M$$%56 ;s   
C!
c                ,   < V ^8  d   QhRS[ RS[ RS[ /# )r   past_kpast_vr9   r:   )r   r   s   "r   r   rv     s"     5 5 5S 5S 5r   c                Z   V P                   P                  R4      pV P                   P                  R4      pVR,           P                  RR4      pVR,           P                  RR4      p\        P                  ! RV.V.V^ .R7      p\        P                  ! RV.V.V^ .R7      pV P
                  P                  V4       V P
                  P                  V4       V P                  V P                  V&   V P                  V P                  V&   V P                   P                  R4      p	VP                  RR4      P                  RR4      P                  R	R
4      p
\        P                  ! RWV.V
.V	^ R7      pV P
                  P                  V4       V P                  V P                  V	&   V
# )zConcatenate past_k and past_v inputs to create past_kv input.

Args:
    past_k (str): name of past K value
    past_v (str): name of past V value

Returns:
    kv_output_name (str): name of past KV value
	Unsqueeze_5d.r   )r@   rA   rB   rC   r   z.valuez.kv_value_kvr   )	r   rO   replacer   rP   r   r   r   r   )r$   r   r   unsqueeze_k_nameunsqueeze_v_name	k_5d_name	v_5d_namek_5dv_5dr   kv_output_name	concat_kvs   &&&         r   r   FusionAttention.concat_kv  s     ::66{C::66{Ce^,,S#6	e^,,S#6	8K!
 8K!
 	  &  &9=9M9M$$%569=9M9M$$%56  ::66x@%8@@cJRRS[]bc$$)#$!
	 	  +9=9M9M$$%56r   c                ,   < V ^8  d   QhRS[ RS[ RS[ /# )r   present_k_namepresent_v_namekv_noder:   )r   r   s   "r   r   rv   8  s'     +K +Ks +KC +K# +Kr   c                   RRrTV P                   P                  V4      pV P                   P                  V4      pVfT   \        P                  ! \        P
                  ! ^ RR7      VR7      pV P                   P                  W`P                  4       VfT   \        P                  ! \        P
                  ! ^RR7      VR7      pV P                   P                  WpP                  4       V P                   P                  R4      pV P                   P                  R4      p	\        P                  ! RW4.V.V^ R7      p
\        P                  ! RW5.V.V	^ R7      pV P                  P                  V
4       V P                  P                  V4       V P                  V P                  V&   V P                  V P                  V	&   R# )	a  Split kv_node containing present KV values into separate present K and present V values.

Args:
    present_k_name (str): name of output to store present K value in
    present_v_name (str): name of output to store present V value in
    kv_node (str): name of present KV values
index_0index_1Nint64)dtype)rB   Gatherr   )r   rT   r   
from_arrayr   arrayrU   r   rO   r   rP   r   r   r   )r$   r   r   r   k_indexv_indexk_dimv_dimgather_k_namegather_v_name	present_k	present_vs   &&&&        r   split_kvFusionAttention.split_kv8  ss    %i

**73

**73= ++BHHQg,FWUEJJ&&u.B.BC= ++BHHQg,FWUEJJ&&u.B.BC 

33H=

33H=$$%#$
	 $$%#$
	 	  +  +6:6J6J$$]36:6J6J$$]3r   c                b   < V ^8  d   QhRS[ RS[ R,          RS[ R,          RS[RS[ R,          /# )r   q_addk_addNv_addname_prefixr9   )r	   r;   )r   r   s   "r   r   rv   e  sL       4 4	
  
T	r   c                   V P                   P                  VP                  ^,          4      ;'       g-    V P                   P                  VP                  ^ ,          4      p\        P                  ! V4      p\
        P                  ! V4      p\
        P                  ! V4      pVew   V P                   P                  VP                  ^,          4      ;'       g-    V P                   P                  VP                  ^ ,          4      p	\        P                  ! V	4      pVew   V P                   P                  VP                  ^,          4      ;'       g-    V P                   P                  VP                  ^ ,          4      p
\        P                  ! V
4      p\
        P                  ! WgV3^ R7      p^\
        P                  ! VP                  4      ,          pVR,           pV P                  VVP                  V.VR7       V# )   r   	_qkv_biasrB   rG   rH   rI   )r   rT   r   r   to_arrayr   
zeros_likestackprodshaperU   rG   )r$   r   r   r   r   q_biasqbkbvbk_biasv_biasqkv_biasqkv_bias_dim	bias_names   &&&&&         r   create_combined_qkv_bias(FusionAttention.create_combined_qkv_biase  s    ++EKKN;iitzz?Y?YZ_ZeZefgZh?i!!&)]]2]]2ZZ//A?mm4::C]C]^c^i^ijk^lCmF%%f-BZZ//A?mm4::C]C]^c^i^ijk^lCmF%%f-B88RRLq1277288,,+-	&&	 	 	
 r   c                z   < V ^8  d   QhRS[ RS[ RS[ RS[ RS[ R,          RS[ R,          RS[S[ S[ S[ 3,          /# )	r   q_matmulk_matmulv_matmulr   r   Nr   r9   )r	   r   )r   r   s   "r   r   rv     sp     M, M,M, M, 	M,
 M, 4M, 4M, 
y)Y.	/M,r   c                   V P                   P                  R4      pVP                  ^ ,          VP                  ^ ,          8X  d*   VP                  ^ ,          VP                  ^ ,          8X  g   Q hV P                   P                  VP                  ^,          4      pV P                   P                  VP                  ^,          4      p	V P                   P                  VP                  ^,          4      p
\        P
                  ! V4      p\        P
                  ! V	4      p\        P
                  ! V
4      pVP                  VP                  8X  d   VP                  VP                  8X  g   Q hVP                  ^ ,          p\        P                  ! WV3^R7      P                  V^V,          34      pVR,           pV P                  VVP                  VP                  ^ ,          VP                  ^,          .VR7       VR,           p\        P                  ! RVP                  ^ ,          V.V.VR7      pV P                  V P                  V&   V.pVR,           pV P                  V\         P"                  ^.^ .RR	7       VR
,           pV P                  V\         P"                  ^.V.RR	7       VR,           pV P                  V\         P"                  ^.^V,          .RR	7       VR,           pV P                  V\         P"                  ^.^V,          .RR	7       VR,           pV P                  V\         P"                  ^.R.RR	7       VR,           p\        P                  ! RVVVV.V.V P                   P                  R4      R7      pV P                  V P                  VP$                  &   VR,           p\        P                  ! RVVVV.V.V P                   P                  R4      R7      pV P                  V P                  VP$                  &   VR,           p\        P                  ! RVVVV.V.V P                   P                  R4      R7      pV P                  V P                  VP$                  &   TpTp Tp!VP'                  VVV.4       V P(                  '       Ed   Ve   V P                   P                  VP                  ^,          4      '       d   ^M^ p"\        P*                  ! \        P
                  ! V P                   P                  VP                  V",          4      4      4      '       dM   VVP                  ^V",
          &   TpVP-                  V4       V P                  V P                  VP$                  &   Ve   V P                   P                  VP                  ^,          4      '       d   ^M^ p"\        P*                  ! \        P
                  ! V P                   P                  VP                  V",          4      4      4      '       dM   VVP                  ^V",
          &   Tp VP-                  V4       V P                  V P                  VP$                  &   Ve   V P                   P                  VP                  ^,          4      '       d   ^M^ p"\        P*                  ! \        P
                  ! V P                   P                  VP                  V",          4      4      4      '       dM   VVP                  ^V",
          &   Tp!VP-                  V4       V P                  V P                  VP$                  &   V P.                  P'                  V4       VV V!3# )a9  Create packed QKV MatMul node before MultiHeadAttention node.
   This is for the scenario where an Attention node should be created but cannot be created
   because past_key and past_value are separate inputs and not one concatenated input.

Args:
    q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
    k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size)
    v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size)
    q_add (NodeProto): name of Add from Q path
    k_add (NodeProto): name of Add from K path
    v_add (NodeProto): name of Add from V path

Returns:
     q_output (NodeProto): Slice node for Q
     k_output (NodeProto): Slice node for K
     v_output (NodeProto): Slice node for V
MatMulr   _qkv_weightr   _qkv_outr?   _q_start_indexFrF   _k_start_index_v_start_index_end_of_qkv_index_qkv_last_axis_q_outSlice_k_out_v_out)r   rO   r   rT   r   r   r   r   r   reshaperU   rG   r   rP   r   r   r
   rW   rB   rR   rq   anyr   r   )#r$   r  r  r  r   r   r   matmul_node_nameq_weightk_weightv_weightqwkwvwd
qkv_weightqkv_weight_nameqkv_matmul_output
qkv_matmul	qkv_nodesq_slice_namek_slice_namev_slice_nameend_of_qkv_nameqkv_last_axis_nameq_slice_outputq_slicek_slice_outputk_slicev_slice_outputv_sliceq_outputk_outputv_outputinitializer_inputs#   &&&&&&&                            r   create_packed_qkv_matmul_node-FusionAttention.create_packed_qkv_matmul_node  s7   4  ::66x@ ~~a HNN1$55(..:Kx~~^_O`:``` ::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!!(+!!(+!!(+xx288#BHH(<<<HHQKXXrrl3;;QAJG
*]: ((""1%z'7'7':;	 	 	
" -z9%%NN1%7&'!	

 :>9M9M$$%56L	 (*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^ab]b\cino*-@@/[=N=NVWUX`ade`e_flqr-0@@"4@Q@QYZX[cebflqr)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|_FXY#$,,W5	
 6:5I5I$$W\\2'7G45111 )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)S)SAYZ!66+..tzz/I/I%++VgJh/ijkk9GEKK$5 56$H$$U+?C?S?SD00< 	  +8++r   c          %         < V ^8  d   QhRS[ RS[ S[,          R,          RS[ S[,          R,          RS[ RS[ R,          RS[ R,          RS[R	S[R
S[RS[RS[RS[RS[RS[RS[RS[RS[RS[ R,          /# )r   r  r  Nr  r   r   r   rn   rm   r   key_padding_maskr   unidirectionalr   r   r   r   
packed_qkvr9   )r	   r;   rs   rt   )r   r   s   "r   r   rv     s     w ww c/D(w c/D(	w
 w 4w 4w w w w w w w w w  !w" #w$ %w& 
T	'wr   c                ~   V^ 8  g   Q hV^ 8  d&   W,          ^ 8w  d   \         P                  RW4       R# V P                  P                  4       P                   Uu0 uF  pVP
                  kK  	  ppV P                  P                  R4      p. pV'       dd   V P                  VVVVVV4      w  pppVP                  VP                  ^ ,          VP                  ^ ,          VP                  ^ ,          .4       EMf\        V\        4      '       d   \        V\        4      '       d   V P                  '       dI   VP                  VP                  ^ ,          VP                  ^ ,          VP                  ^ ,          .4       MVP                  VP                  ^ ,          VP                  ^ ,          VP                  ^ ,          .4       M\        V\        4      '       d   \        V\        4      '       dk   VV9   dd   VV9   d]   V P                  '       d&   VP                  VP                  ^ ,          W#.4       M'VP                  VP                  ^ ,          W#.4       MR# V P                  '       g&   V P                  WEVV4      pVP                  V4       MVP                  R4       V'       d   V'       d   VP                  WW.4       M"V
'       g	   V'       d   VP                  W.4       V	.pV'       d   V'       d   VP                  VV.4       \         P"                  ! RVVVR7      pRVn        VP&                  P                  \         P(                  ! RV4      4       V'       d:   VP&                  P                  \         P(                  ! R	\+        V4      4      4       V P-                  R4       V# u upi )
a  Create a MultiHeadAttention node.

Args:
    q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
    k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
    v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
    q_add (NodeProto): name of Add from Q path
    k_add (NodeProto): name of Add from K path
    v_add (NodeProto): name of Add from V path
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    output (str): output name of MHA
    key_padding_mask (str): name of key padding mask
    add_qk (str): name of add after Q x K'
    unidirectional (bool): whether to apply causal attention mask automatically or not
    past_k (str): name of past K value - (batch_size, num_heads, past_sequence_length, head_size)
    past_v (str): name of past V value - (batch_size, num_heads, past_sequence_length, head_size)
    present_k (str): name of present K value - (batch_size, num_heads, sequence_length, head_size)
    present_v (str): name of present V value - (batch_size, num_heads, sequence_length, head_size)
    packed_qkv (bool): whether to combine MatMuls from Q, K, V paths
                       Note: This is for the scenario where an Attention node should be created but cannot be created
                       because past_key and past_value are separate inputs and not one concatenated input.

Returns:
    Union[NodeProto, None]: the node created or None if failed.
9input hidden size %d is not a multiple of num of heads %dNry    rx   r?   com.microsoftrn   r6  )r   r   r   graphr   rB   rO   r2  rR   r   r   r	   rq   r;   r   r   r   rP   domainrQ   rS   rs   increase_counter)r$   r  r  r  r   r   r   rn   rm   r   r5  r   r6  r   r   r   r   r7  r   graph_input_namesmha_node_name
mha_inputsr)  r+  r-  r   mha_outputsmha_nodes   &&&&&&&&&&&&&&&&&&          r   create_multihead_attention_node/FusionAttention.create_multihead_attention_node  s   ^ 1}}? 7A=LLTVam37::3C3C3E3K3KL3K4TYY3KL

33K@ 
(,(J(J)%GWg w~~a0'..2CW^^TUEVWX),,Hi1P1P555!!5<<?HOOA4FUV"XY!!8??1#5xq7I8??[\K]"^_x%%8S))----555!!5<<?H"GH!!8??1#5x"JK 55555eE=YIi(b! f/HI/89 h	956## 	
 *!!&"7"7Y"OP%%f&;&;<LcR`Na&bc23E Ms   N:c          '         < V ^8  d   QhRS[ R,          RS[RS[RS[RS[RS[RS[R	S[R
S[RS[ RS[ RS[ RS[RS[ RS[ RS[ RS[ RS[R,          RS[R,          /# )r   r-   Nr  r  r  r   r   r   rn   rm   first_inputr   
add_qk_strcausalr   r   r   r   scaler9   )r;   r	   rs   rt   float)r   r   s   "r   r   rv     s     h h$Jh h 	h
 h h h h h h h h h h h  !h" #h$ %h& t|'h( 
T	)hr   c                   V^ 8  g   Q hV	^ 8  d&   W,          ^ 8w  d   \         P                  RW4       R# RpVf   Vf   Vf   RpV P                  P                  VP                  ^,          4      pV P                  P                  VP                  ^,          4      pV P                  P                  VP                  ^,          4      pRRRpppV'       EdC   V P                  P                  VP                  ^,          4      ;'       g-    V P                  P                  VP                  ^ ,          4      pV P                  P                  VP                  ^,          4      ;'       g-    V P                  P                  VP                  ^ ,          4      pV P                  P                  VP                  ^,          4      ;'       g-    V P                  P                  VP                  ^ ,          4      pV'       d   V'       d   V'       d	   V'       g   R# Vf"   \        VP                  ^,           R24       R# \        P                  ! V4      p\        P                  ! V4      p\        P                  ! V4      pVP                  VP                  8X  g   Q hVP                  ^ ,          pVP                  ^ ,          pVP                  ^ ,          pVTu;8X  d	   V8X  g   Q h Q hV	^ 8  d   V	V8w  d   \         P                  RV	V4       Rp VP                  VP                  8w  d   Rp \        P                  ! VP                  R,          4      p!\        P                  ! VP                  R,          4      p"\        P                  ! VP                  R,          4      p#^ p$V '       d-   \        P                  ! VVV3^R7      p%V!V",           V#,           p$M$\        P                  ! VVV3^R7      p%^V!,          p$^ p&Rp'V'       Ed   \        P                  ! V4      p(\        P                  ! V4      p)\        P                  ! V4      p*\        P                  ! V(P                  4      p+\        P                  ! V)P                  4      p,\        P                  ! V*P                  4      p-V+T,u;8X  d	   V!8X  g   Q h Q hV-V#8X  g   Q hV '       d-   \        P                  ! V(V)V*3^ R7      p'V+V,,           V-,           p&M$\        P                  ! V(V)V*3^ R7      p'^V+,          p&V P                  P                  R	4      p.V P                  '       g2   V P!                  V.R
,           VP"                  V\%        V$4      .V%R7       V'       d1   V P!                  V.R,           VP"                  \%        V&4      .V'R7       V P                  '       d   V'       d   \         P                  R4       R# VP&                  ^ ,          VP&                  ^ ,          VP&                  ^ ,          V.R,           .p/Ve   V/P)                  V4       \*        P,                  ! RV/V.V.R7      p0V P/                  R4       EM@T
V.R
,           V'       d
   V.R,           MR.p/Ve   V/P)                  V4       MV/P)                  R4       T;'       d    Tp1V1'       d#   V P1                  W4      p2V/P)                  V24       V'       d+   V1'       g   V/P)                  R4       V/P)                  V4       V.p3V'       d_   V'       dW   VP3                  RR4      P3                  RR4      P3                  RR4      p4V3P)                  V44       V P5                  VVV44       \*        P,                  ! R	V/V3V.R7      p0V P/                  R	4       RV0n        V0P8                  P;                  \*        P<                  ! RV4      .4       V'       d2   V0P8                  P;                  \*        P<                  ! R^4      .4       Ve2   V0P8                  P;                  \*        P<                  ! RV4      .4       V '       d5   V0P8                  P;                  \*        P<                  ! RV!V"V#.4      .4       V P>                  eE   V0P8                  P;                  \*        P<                  ! R\A        V P>                  4      4      .4       V0# )a  Create an Attention node.

Args:
    mask_index (str | None): mask input
    q_matmul (NodeProto): MatMul node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    q_add (NodeProto): Add bias node in fully connection for Q
    k_add (NodeProto): Add bias node in fully connection for K
    v_add (NodeProto): Add bias node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    first_input (str): first input name
    output (str): output name
    add_qk_str (str): name of Add node after Q x K'
    causal: whether it is uni-directional mask.
    past_k (str): name of input for past K value
    past_v (str): name of input for past V value
    present_k (str): name of output to store present K value
    present_v (str): name of output to store present V value
    scale: scale before softmax

Returns:
    Union[NodeProto, None]: the node created or None if failed.
r9  NTFzl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (%d) is not same as weight matrix dimension of q,k,v (%d). Please provide a correct input hidden size or pass in 0:r   NNr   ry   r  r   r   zVMultiHeadAttention does not support relative_position_bias: cannot fuse the attention.rx   r?   r:  z.key_keyr   r   r;  rn   r6  rJ  qkv_hidden_sizesr{   )!r   r   r   rT   r   printr   r   r   r   r   r   concatenater   rO   rp   rU   rG   rs   r   r   r   rP   r>  r   r   r   r=  rQ   rR   rS   r{   rK  )5r$   r-   r  r  r  r   r   r   rn   rm   rG  r   rH  rI  r   r   r   r   rJ  has_biasr  r  r  r   r   r   r  r  r  
qw_in_size
kw_in_size
vw_in_sizeis_qkv_diff_dimsqw_out_sizekw_out_sizevw_out_sizeqkv_weight_dimr  r   r   r   r   r   q_bias_shapek_bias_shapev_bias_shapeattention_node_nameattention_inputsattention_nodepast_existspast_kvattention_outputs
present_kvs5   &&&&&&&&&&&&&&&&&&&                                  r   create_attention_node%FusionAttention.create_attention_node  s(   \ 1}}? 7A=LLTVam=U]u}H::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!%tT8ZZ//A?mm4::C]C]^c^i^ijk^lCmFZZ//A?mm4::C]C]^c^i^ijk^lCmFZZ//A?mm4::C]C]^c^i^ijk^lCmFf>>!$% &g g !!(+!!(+!!(+ xx288###XXa[
XXa[
XXa[
Z5:55555?{j8NNJ	 !88rxx#
 ggbhhrl+ggbhhrl+ggbhhrl+R1=J(;6DN2r2,Q7J_N&*8%%f-B%%f-B%%f-B77288,L77288,L77288,L<>;>>>>>;...>>2r2,Q?+l:\I88RRLq9 </"jj99+F,,,  (=8",, #n"56	 !    (;6 **,'(	 !  (((uv """#k1	  % ''
3#--$'(	N !!"67 #m35=#k12 
 % ''
3 ''+ ++VK..8 ''0"$++B/ ''
3!'Y&..vr:BB62NVVWZ\_`
!((4iJ?#--')(	N !!+. /  '')>)>{I)V(WX$$++V-B-BCSUV-W,XY$$++V-B-B7E-R,ST$$++&&'9KVa;bcd !!-$$++V-B-BCVX]^b^t^tXu-v,wxr   c                   TpTpVP                   R 8X  d'   V P                  P                  VR^ 4      pVe   TpMR# V P                  P                  V. RO. RO4      pRpVe   Vw   rrM-V P                  P                  V. R O. R!O4      pVe   Vw  rrMR# . p\	        VP
                  4       F>  w  rW9  d   K  W^ ,          P                  ^ ,          8X  d   K-  VP                  V4       K@  	  \        V4      ^8w  d   R# V^ ,          pV P                  P                  VR^ 4      pVe   VVP                  ^ ,          ,          pVeA   \        V4      ^8X  d1   V^,          pVP                   R 8X  d   VP                  ^ ,          pMuR# Ve%   \        V4      ^8X  d   VP                  ^ ,          pMKR# VP                   R 8X  d9   VV,          pV F)  pVP                   R 8X  g   K  VP                  ^ ,          pK+  	  VV,          pVP                   R8X  d.   \        VP                  4      ^8X  d   VP                  ^ ,          pVV,          pV Uu. uF  pVP                   NK  	  ppVP                  R4      ^8w  d   R# V P                  P                  V. R"O. R#O4      pVf   \        P                  R4       R# Vw   p	ppRpRpRpRpR	. R$O. R%O3R
. R&O. R%O3R. R'O. R(O3R. R)O. R*O3R. R+O. R,O3R. R-O. R.O3/pRp VP                  4        F^  w  p!p"V P                  P                  VV"^ ,          V"^,          4      p V f   K7  V!R8X  d   RpMV!R8X  d   RpMV!R8X  d   RpM	V!R8X  d   Rp M	  V f   \        P                  R4       R# Rp#Rp$Rp%Rp&V'       d	   V w  p	p%p$p	M7V'       d	   V w  p	p#p%p$M'V'       d   V w   p	p$MV'       d
   V w  p	p#p$p&p	MV w  p	p#p	p$T&;'       g    T$p&V P                  P                  V&. R"O. R/O4      p'V'f=   V P                  P                  V&. R0O. R1O4      p'V'f   \        P                  R4       R# V'R2,          p(V'R3,          p)V'R4,          p*T$p+V'       dB   V P                  P                  V$RR.^R.4      p,V,f   \        P                  R4       R# V,w  p+p	V P                  P                  T+. R"OV'       d   ^ M^^ ^ R.4      p-V-f=   V P                  P                  V$. R5O. R6O4      p-V-f   \        P                  R4       R# V-R3,          p.V-R4,          p/Rp0Rp1V'       d8   V P                  P                  V%. R7O. R,O3. R8O. R,O3. R9O. R:O3.V4      w  p	p0p	MV'       db   V P                  P                  V%. R;O. R:O3. R8O. R,O3.V4      w  p	p0p	V#e.   V P                  V#4      p1V1f   \        P                  RV#4       R# MFV'       d   M=V P                  P                  V#. R<O. R=O3. R>O. R?O3. R@O. RAO3. RBO. RCO3.V4      w  p	p0p	V'       g   V0f   \        P                  R4       R# V'       g   \        V04      ^8  d   V P                  P                  V0^ ,          4      w  p	p2V2eG   \!        V2\"        P$                  4      '       d'   V2P&                  ^8X  d   V2P)                  4       ^ 8  d   R# V2P)                  4       RD8w  d   V2P)                  4       V n        VP
                  ^ ,          V8X  Ed   V*P
                  ^ ,          V8X  Ed   V/P
                  ^ ,          V8X  Ed   V'       g4   V P,                  P/                  V0R4,          P
                  ^ ,          4      MRp3Vf   X
MTp4V P1                  V(4      w  p5p6V5^ 8:  g   V6^ 8:  d   \        P3                  R4       R# V P5                  V3V*V/VV)V.VV5V6VV4P                  ^ ,          V1R7      p7V7f   R# V P6                  P                  V74       V P8                  V P:                  V7P<                  &   Ve   VP
                  ^ ,          p8RV8,           p9V P?                  RV8,           \@        PB                  ^.^ ^ V5\E        V6V5,          4      .RR7      p:V P                  PG                  \H        PJ                  ! RV4P                  ^ ,          V:P<                  .V9.RV8,           4      V P8                  4       V9VP
                  ^ &   V PL                  PO                  V4W.4       V PL                  PO                  V 4       V PL                  PO                  V PP                  '       g   T'MV'RR4 4       V PL                  PO                  V PP                  '       g   T-MV-RR4 4       V PL                  PO                  V PP                  '       g   TMVRR4 4       RV n)        R# R# R# R# u upi )Erk   AddNr  ReshapeMulrj   z&fuse_attention: failed to match v pathFpath1path2path3path4path5sdpaSqrtTz'fuse_attention: failed to match qk pathz&fuse_attention: failed to match q pathz/fuse_attention: failed to match mul sqrt q pathz&fuse_attention: failed to match k pathr:  z6fuse_attention: failed to verify shape inference of %sz)fuse_attention: failed to match mask pathzmFailed to detect num_heads and hidden_size for Attention fusion. Please specify those parameters in argument.)r-   r  r  r  r   r   r   rn   rm   rG  r   rH  edge_modified_shape_modified_tensorrF   reshape_modified_)rg  r  rh  	Transposer  )NNr   r   r   )rg  Einsumrt  r  )r   Nr   r   )rt  rh  rg  r  )r   r   r   N)Softmaxrg  Divr  )r   r   Nr   )rv  rg  ri  r  )rv  Wherer  rw  )r   r   r   r   )rv  rg  rx  r  )r   r   r   r   )rv  rw  r  )r   r   r   )rv  rg  r  ri  rp  )r   r   Nr   r   )r   r   r   N)rw  rt  rh  rg  r  )r   r   r   r   Nr  )rt  rt  rh  rg  r  )r   r   r   r   N)Expandrh  Equal)r|  r   r   )Castr{  rh  r|  )r   r   r   r   )r}  r|  r   r   )ri  Subr}  r   r   )Nr   r   r   r   )ri  r~  r   r   )Nr   r   r   )rx  r}  r~  r{  r   r   )Nr   r   r   r   r   )rx  r}  r~  r}  r{  r   r   )Nr   r   r   r   r   r   i)*r   r   match_parentmatch_parent_path	enumerater   r   r   r2   countr   r   itemsmatch_parent_pathsr   get_constant_inputr   r   r   r   itemr{   ro   r_   r   r   rd  r   r   r   rB   rU   r
   rW   rs   rX   r   rP   nodes_to_removerR   rp   prune_graph);r$   r   input_name_to_nodesoutput_name_to_nodenormalize_node
start_nodeadd_before_layernormr"  einsum_noder   reshape_qkvtranspose_qkv
matmul_qkvother_inputs_i
node_input
root_inputmul_before_layernormmul_childrenlayernorm_nodechildrenchildparent_nodechildren_typesv_nodesadd_vmatmul_v
is_distillis_distill_addis_no_mask_attentionis_sdpaqk_pathsqk_nodeskvr   	matmul_qkwhere_qkafter_qq_nodesr   add_qmatmul_qafter_kmul_k_nodesk_nodesadd_kmatmul_k
mask_nodesrH  mul_valr-   attention_last_nodeq_num_headsq_hidden_sizenew_nodeunique_indexnew_edgeshape_tensors;   &&&&                                                       r   fuseFusionAttention.fusev  s	    #
!!%99#'::#:#:>5RS#T #/1
 JJ00?!
	
  =F:Qz 

44DoI $>G;
'
(8(89NB4q\0033
+ : |!!!_
  $zz66z5!L+./C/J/J1/MNL'C,=,B!-a!))-AA!/!6!6q!9J)c,.?1.D188;
##';;*:6H!==$88!&aJ " **5"::s;CUCU?VZ[?[$++A.J&z25=>XE%--X>)Q.**..z;dfuv?LLAB")Auh
$9?K9?K;\J;\J2I>@BTU
 NN$DAqzz33J!adKHG|!
g!%g'+$f % LLBC	*2'Q)Q/7,Q)! (Q919.Q	7A(0%Q9&&Y**..w8acrs?jj22@"G
 EFBK	2;**66y5&/TUW[S\]K"NO&LWa**..>gSTVWYZ\`@a
 ?jj22F"G
 EF2; 

#zz<<3Y?8)D;\J
 $ Az1 #zz<<@,O8)D $ Az1 !!008
%LL!Y[ab!#zz<<EGYZ=OQShiY[st $
 Az1 $
(:LLDE#J!(;66z!}EJAw "7BJJ77GLLA<MLLNa'||~')0&>>!
*x~~a/@J/NS[SaSabcSdhrSrZn,,99*R.:N:Nq:QRtxJ1<1D+-)-)K)KI)V&Ka=A#5C  11%!!!%)&*11!4% 2 H $$X.:>:N:ND((7&*003+l:#330<?)//QS1L-MN  4   

##$$!,33A68I8IJ!
+l:	 (( (0!!!$  '')<m(XY  ''1   ''t7T7T7TZabeceZfg  ''t7T7T7TZabeceZfg  ''t7T7T7TZabeceZfg  $DI Ts/N*q ?s   g)ro   rq   rm   r}   r{   rn   r|   r  r~   r   rp   )r:  r:  Fr:  r:  r:  r:  F)r:  Fr:  r:  r:  r:  N)ra   rb   rc   rd   re   r%   r   r   r   r   r   r   r   r2  rD  rd  r  rf   rg   __classcell__)r   r   s   @@r   ri   ri   g   s      04).27&>@T%U% %40 0>,& ,&\ *   25 5n+K +KZ <M, M,`w wrh hTo$ o$r   ri   )loggingr   numpyr   fusion_baser   fusion_optionsr   fusion_utilsr   r   onnxr	   r
   r   r   
onnx_modelr   ra   r   r   ri    r   r   <module>r     sD   
    . 1 = =  	8	S Sl~$f ~$r   