+
    9i`                     x    ^ RI Ht ^ RIt^ RIHt ^ RIHt ^ RIH	t	H
t
Ht ^ RIHt ]! ]4      t ! R R]4      tR# )	    )	getLoggerN)Fusion)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModelc                      a a ] tR t^t oRtV3R lV 3R lltRV3R lR lltR tRV3R lR lltV3R	 lR
 lt	V3R lR lt
R tR tR tR tR tV3R lR ltR tR tRtVtV ;t# )FusionAttentionUnetz:
Fuse Attention subgraph of UNet into one Attention node.
c                >   < V ^8  d   QhRS[ RS[RS[RS[RS[RS[/# )   modelhidden_size	num_headsis_cross_attentionenable_packed_qkvenable_packed_kv)r	   intbool)format__classdict__s   "l/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/fusion_attention_unet.py__annotate__ FusionAttentionUnet.__annotate__   sG     ( (( ( 	(
 !(  ( (    c                   < \         ST `  TV'       d   V'       d   R MRR.4       W n        W0n        W@n        WPn        W`n        RV n        RV n        R# )	AttentionMultiHeadAttentionLayerNormalizationTN)	super__init__r   r   r   r   r   num_heads_warninghidden_size_warning)selfr   r   r   r   r   r   	__class__s   &&&&&&&r   r!   FusionAttentionUnet.__init__   sY     	-2CKI]!"	

 '""4 "3 0 "&#' r   c                ,   < V ^8  d   QhRS[ RS[RS[/# )r   	reshape_q	is_torch2return)r   r   r   )r   r   s   "r   r   r   2   s"      y T c r   c                   ^ pV'       d   V P                   P                  V^4      pV'       d   VP                  R8X  d   \        VP                  4      ^8X  ds   V P                   P                  VP                  ^,          4      p\        V\        P                  4      '       d'   \        VP                  4      ^.8X  d   \        V4      pMyV P                   P                  VP                  ^,          4      p\        V\        P                  4      '       d.   \        VP                  4      ^.8X  d   \        V^,          4      p\        V\        4      '       d
   V^ 8  d   V# ^ # )zDetect num_heads from a reshape node.

Args:
    reshape_q (NodeProto): reshape node for Q
    is_torch2 (bool): graph pattern is from PyTorch 2.*
Returns:
    int: num_heads, or 0 if not found
Concat)r   
get_parentop_typeleninputget_constant_value
isinstancenpndarraylistshaper   )r$   r(   r)   r   reshape_parentq_shape_values   &&&   r   get_num_heads!FusionAttentionUnet.get_num_heads2   s    	!ZZ229a@N."8"8H"D^MaMaIbfgIg JJ99.:N:Nq:QR	i44ioo9NSTRU9U #II !JJ99)//!:LMM-44m>Q>Q9RWXVY9Ya 01	i%%)a-r   c                    V P                   P                  VP                  ^,          4      pV'       d(   \        P                  ! V4      P
                  ^ ,          # ^ # )zDetect hidden_size from LayerNormalization node.
Args:
    layernorm_node (NodeProto): LayerNormalization node before Q, K and V
Returns:
    int: hidden_size, or 0 if not found
)r   get_initializerr0   r   to_arrayr6   )r$   layernorm_nodelayernorm_biass   && r   get_hidden_size#FusionAttentionUnet.get_hidden_sizeN   sE     33N4H4H4KL''7==a@@r   c          
      H   < V ^8  d   QhRS[ RS[ RS[RS[S[S[3,          /# )r   r(   r>   r)   r*   )r   r   tupler   )r   r   s   "r   r   r   [   s5      &  &" &4= &JN &	sCx &r   c                   V P                  W4      pV^ 8:  d   V P                  pV P                  ^ 8  dP   W@P                  8w  d@   V P                  '       d.   \        P	                  RV P                   RV R24       RV n        V P                  V4      pV^ 8:  d   V P                  pV P                  ^ 8  dP   WPP                  8w  d@   V P                  '       d.   \        P	                  RV P                   RV R24       RV n        WE3# )a  Detect num_heads and hidden_size.

Args:
    reshape_q (NodeProto): reshape node for Q
    is_torch2 (bool): graph pattern is from PyTorch 2.*
    layernorm_node (NodeProto): LayerNormalization node before Q, K, V
Returns:
    Tuple[int, int]: num_heads and hidden_size
z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r9   r   r"   loggerwarningr@   r   r#   )r$   r(   r>   r)   r   r   s   &&&&  r   get_num_heads_and_hidden_size1FusionAttentionUnet.get_num_heads_and_hidden_size[   s     &&y<	>I>>A)~~"=%%%0@@TU^T__vwx).&**>:!**KaK3C3C$C''''(8(8'99Mk]Zqr ,1(%%r   c                X   < V ^8  d   QhRS[ RS[ RS[ RS[RS[RS[RS[RS[ R	,          /# )
r   q_matmulk_matmulv_matmulr   r   r0   outputr*   Nr   r   str)r   r   s   "r   r   r   }   sj     @ @@ @ 	@
 @ @ @ @ 
T	@r   c           
        V P                   '       * pV'       d   VP                  ^ ,          V8w  g1   VP                  ^ ,          V8w  g   VP                  ^ ,          V8w  dN   \        P                  RVP                  ^ ,          VP                  ^ ,          VP                  ^ ,          4       R# MVP                  ^ ,          V8w  gB   VP                  ^ ,          VP                  ^ ,          8w  g   VP                  ^ ,          V8X  dN   \        P                  RVP                  ^ ,          VP                  ^ ,          VP                  ^ ,          4       R# V^ 8  d+   WT,          ^ 8w  d   \        P                  RV RV 24       R# V P                  P                  VP                  ^,          4      p	V P                  P                  VP                  ^,          4      p
V P                  P                  VP                  ^,          4      pV	'       d   V
'       d	   V'       g   R# V	P                  p\        P                  ! V	4      p\        P                  ! V
4      p\        P                  ! V4      p\        P                  RVP                   RVP                   RVP                   R	V 24       V'       Ed$   VP                  VP                  8w  g   VP                  VP                  8w  d   R# VP                  ^ ,          pV^ 8  d   VV8w  d   \        R
V RV R24      h\        \        P                  ! VP                  R,          4      4      pV P                  '       Ed   V P                  P                  R4      pTpTpVV,          p\        P                   ! VP#                  VVV4      VP#                  VVV4      VP#                  VVV4      .4      P#                  VV^,          V,          4      pV P                  P                  RRR7      pV P%                  VR,           VVP                  ^ ,          VP                  ^,          .VR7       \&        P(                  ! RVP                  ^ ,          VR,           .VR,           .VR7      pV P*                  V P,                  VP.                  &   V P%                  VR,           \0        P2                  ^.^ ^ V^V.RR7       \&        P(                  ! RVR,           VR,           .VR,           .VR,           R7      pV P*                  V P,                  VP.                  &   V P4                  P7                  VV.4       V P8                  P7                  WV.4       EM\        P:                  ! WV3^R7      p^V,          pV P                  P                  R4      pV P%                  VR,           VVV.VR7       EMV P                  P                  R4      pV P<                  '       Ed}   VP                  VP                  8w  d   R# VP                  ^ ,          pVP                  ^ ,          pVV8X  g   Q hVP                  ^,          pVP                  ^,          pVP                  ^,          pVV8X  d   VV8X  g   Q hTpTpVV,          p\        P                   ! VP#                  VVV4      VP#                  VVV4      .4      P#                  VV^,          V,          4      pV P                  P                  RRR7      pV P%                  VR,           VVP                  ^ ,          VP                  ^,          .VR7       \&        P(                  ! RVP                  ^ ,          VR,           .VR,           .VR7      pV P*                  V P,                  VP.                  &   V P%                  VR,           \0        P2                  ^.^ ^ V^V.RR7       \&        P(                  ! RVR,           VR,           .VR ,           .VR,           R7      pV P*                  V P,                  VP.                  &   V P4                  P7                  VV.4       V P8                  P7                  W#.4       \        P>                  ! ^V.\        P@                  R!7      p ^V,          p!V P%                  VR",           VV!.V R7       V'       d2   V P                  '       g   VVR,           VR",           .p"MzVR,           .p"MoV P<                  '       gB   VPB                  ^ ,          VPB                  ^ ,          VPB                  ^ ,          VR",           .p"MVPB                  ^ ,          VR ,           .p"\&        P(                  ! V'       d   V P                  '       g   RMRV"V.VR7      p#R#V#n"        V#PF                  P7                  \&        PH                  ! R$V4      .4       V'       d   V P                  '       g   R%M8R&PK                  V P                  '       d   R'MV P<                  '       d   R(MR)4      p$V PM                  V$4       V## )*=  Create an Attention node.

Args:
    q_matmul (NodeProto): MatMul node in fully connection for Q
    k_matmul (NodeProto): MatMul node in fully connection for K
    v_matmul (NodeProto): MatMul node in fully connection for V
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
    hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
    input (str): input name
    output (str): output name

Returns:
    Union[NodeProto, None]: the node created or None if failed.
RFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNXFor cross attention, input hidden state for q and k/v shall be different. Got %s, %s, %sinput hidden size # is not a multiple of num of heads qw= kw= vw= hidden_size=Input hidden size (,) is not same as weight dimension of q,k,v (:). Please provide a correct input hidden size or pass in 0   NNr   MatMul
MatMul_QKVname_prefix_weightname	data_typedimsvals_outinputsoutputsre   _reshape_shapeFre   rf   rg   rh   rawReshape
_qkv_input_reshape)axisr   _qkv_weight	MatMul_KV	_kv_inputdtype	_qkv_biascom.microsoftr   Attention (self attention)MultiHeadAttention ({})self attention with packed qkvcross attention with packed kvcross attention)'r   r0   rE   debugr   r<   rf   r   r=   r6   
ValueErrorr   r3   prodr   create_node_namedstackreshapeadd_initializerr   	make_nodethis_graph_namenode_name_to_graph_namere   r   INT64nodes_to_addextendnodes_to_removestackr   zerosfloat32rM   domain	attributemake_attributer   increase_counter)%r$   rJ   rK   rL   r   r   r0   rM   is_self_attentionq_weightk_weightv_weight
float_typeqwkwvw
qw_in_sizeqw_out_sizeattention_node_namecnh
qkv_weightmatmul_node_namematmul_nodereshape_nodeqkv_weight_dim
kw_in_size
vw_in_sizekw_out_sizevw_out_size	kv_weightqkv_biasqkv_bias_dimattention_inputsattention_nodecounter_names%   &&&&&&&&                             r   create_attention_node)FusionAttentionUnet.create_attention_node}   s	   0 !% 7 77~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	  Lf ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	 ? 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( ''
!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhrl34K%%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5($**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1<?@)J6  CGBVBV,,\->->?!!((+|)DE$$++X,JK  XXrrl;
!"[&*jj&A&A+&N#$$,}<($n5#	 %  #'**"="=>R"S$$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5(#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1;>?)J6  CGBVBV,,\->->?!!((+|)DE$$++X,@A 88Q,BJJ?;${2 	 	 	
 )))'-7'+5$  %8,$F#G (((OOA&OOA&OOA&'+5	$  OOA&'+5$ 
  ))-d6L6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)?)? )*11))) 1 ((( 6& 	 	l+r   c                X   < V ^8  d   QhRS[ RS[ RS[ RS[RS[RS[RS[RS[ R	,          /# )
r   q_matmul_addk_matmul_addv_matmul_addr   r   r0   rM   r*   NrN   )r   r   s   "r   r   r     sj     T TT  T  	T
 T T T T 
T	Tr   c           
     p%   V P                   '       * pV P                  P                  VR^ 4      p	V P                  P                  VR^ 4      p
V P                  P                  VR^ 4      pV P                  V4      pVf   R# Vw  rV P                  V4      pVf   R# Vw  ppV P                  V4      pVf   R# Vw  ppV'       Ed-   V	P                  ^ ,          V8w  g1   V
P                  ^ ,          V8w  g   VP                  ^ ,          V8w  dN   \
        P                  RV	P                  ^ ,          V
P                  ^ ,          VP                  ^ ,          4       R# VP                  ^ ,          V8w  g1   VP                  ^ ,          V8w  g   VP                  ^ ,          V8w  dN   \
        P                  RVP                  ^ ,          VP                  ^ ,          VP                  ^ ,          4       R# EMLV	P                  ^ ,          V8w  gB   V
P                  ^ ,          VP                  ^ ,          8w  g   V
P                  ^ ,          V8X  dN   \
        P                  RV	P                  ^ ,          V
P                  ^ ,          VP                  ^ ,          4       R# VP                  ^ ,          V8w  gB   VP                  ^ ,          VP                  ^ ,          8w  g   V
P                  ^ ,          V8X  dN   \
        P                  RVP                  ^ ,          VP                  ^ ,          VP                  ^ ,          4       R# V^ 8  d+   WT,          ^ 8w  d   \
        P                  RV RV 24       R# V P                  P                  V	P                  ^,          4      pV P                  P                  V
P                  ^,          4      pV P                  P                  VP                  ^,          4      pV'       d   V'       d	   V'       g   R# VP                  ^
8X  d   \
        P                  R	4       R# \        P                  ! V4      p\        P                  ! V4      p\        P                  ! V4      p\
        P                  R
VP                   RVP                   RVP                   RV 24       V'       Ed   VP                  VP                  8w  g   VP                  VP                  8w  d   R# VP                  ^ ,          pV^ 8  d   VV8w  d   \        RV RV R24      h\        \        P                  ! VP                  R,          4      4      pV P                   '       Ed   V P                  P#                  R4      pTpTpVV,          p \        P$                  ! VP'                  VVV 4      VP'                  VVV 4      VP'                  VVV 4      .4      P'                  VV^,          V ,          4      p!V P                  P#                  RRR7      p"V P)                  V"R,           \*        P,                  V!P                  ^ ,          V!P                  ^,          .V!R7       \.        P0                  ! RV
P                  ^ ,          V"R,           .V"R,           .V"R7      p#V P2                  V P4                  V#P6                  &   VP6                  R,           p$V P)                  V$\*        P8                  ^.^ ^ VV .RR7       V P                  P#                  RRR7      p%\.        P0                  ! RVP:                  ^ ,          V$.V%R,           .V%R7      p&V P2                  V P4                  V&P6                  &   V P                  P#                  RRR7      p'\.        P0                  ! RVP:                  ^ ,          V$.V'R,           .V'R7      p(V P2                  V P4                  V(P6                  &   V P                  P#                  RRR7      p)\.        P0                  ! RVP:                  ^ ,          V$.V)R,           .V)R7      p*V P2                  V P4                  V*P6                  &   V P                  P#                  R R!R7      p+\.        P0                  ! R V&P:                  ^ ,          V(P:                  ^ ,          V*P:                  ^ ,          .V+R,           .V+R7      p,V,P<                  P?                  \.        P@                  ! R"^4      .4       V P2                  V P4                  V,P6                  &   V,P6                  R,           p-V P)                  V-\*        P8                  ^.^ ^ V^,          V ,          .RR7       V P                  P#                  RR#R7      p.\.        P0                  ! RV,P:                  ^ ,          V-.V.R,           .V.R7      p/V P2                  V P4                  V/P6                  &   V P                  P#                  R$R%R7      p0\.        P0                  ! R$V/P:                  ^ ,          V#P:                  ^ ,          .V0R,           .V0R7      p1V P2                  V P4                  V1P6                  &   V0R,           p2V P)                  V2\*        P8                  ^.^ ^ V^V .RR7       \.        P0                  ! RV1P:                  ^ ,          V2.VR&,           .V0R',           R7      p3V P2                  V P4                  V3P6                  &   V PB                  P?                  V#V&V(V*V,V/V1V3.4       V PD                  P?                  WWW#.4       EMR# V P                  P#                  R4      pV PF                  '       Ed   VP                  VP                  8w  d   R# VP                  ^ ,          p4VP                  ^ ,          p5V4V58X  g   Q hVP                  ^,          pVP                  ^,          p6VP                  ^,          p7VV78X  d   V6V78X  g   Q hT4pTpV6V,          p \        P$                  ! VP'                  VVV 4      VP'                  VVV 4      .4      P'                  VV^,          V ,          4      p8V P                  P#                  RR(R7      p"V P)                  V"R,           \*        P,                  V8P                  ^ ,          V8P                  ^,          .V8R7       \.        P0                  ! RV
P                  ^ ,          V"R,           .V"R,           .V"R7      p#V P2                  V P4                  V#P6                  &   VP6                  R,           p9V P)                  V9\*        P8                  ^.^ ^ VV .RR7       V P                  P#                  RRR7      p'\.        P0                  ! RVP:                  ^ ,          V9.V'R,           .V'R7      p(V P2                  V P4                  V(P6                  &   V P                  P#                  RRR7      p)\.        P0                  ! RVP:                  ^ ,          V9.V)R,           .V)R7      p*V P2                  V P4                  V*P6                  &   V P                  P#                  R R)R7      p:\.        P0                  ! R V(P:                  ^ ,          V*P:                  ^ ,          .V:R,           .V:R7      p;V;P<                  P?                  \.        P@                  ! R"^4      .4       V P2                  V P4                  V;P6                  &   V;P6                  R,           p<V P)                  V<\*        P8                  ^.^ ^ V^,          V ,          .RR7       V P                  P#                  RR*R7      p=\.        P0                  ! RV;P:                  ^ ,          V<.V=R,           .V=R7      p>V P2                  V P4                  V>P6                  &   V P                  P#                  R$R+R7      p?\.        P0                  ! R$V>P:                  ^ ,          V#P:                  ^ ,          .V?R,           .V?R7      p@V P2                  V P4                  V@P6                  &   V?R,           p2V P)                  V2\*        P8                  ^.^ ^ V^V .RR7       \.        P0                  ! RV@P:                  ^ ,          V2.VR,,           .V?R',           R7      p3V P2                  V P4                  V3P6                  &   V PB                  P?                  V#V(V*V;V>V@V3.4       V PD                  P?                  WW#.4       MR# \        PH                  ! ^V.\        PJ                  R-7      pA^V,          pBV P)                  VR.,           \*        P,                  VB.VAR7       V'       d    V P                   '       g   R# VR&,           .pCM0V PF                  '       g   R# VP:                  ^ ,          VR,,           .pC\.        P0                  ! V'       d   V P                   '       g   R/MRXCV.VR7      pDR0VDn&        VDP<                  P?                  \.        P@                  ! R1V4      .4       V'       d   V P                   '       g   R2M8R3PO                  V P                   '       d   R4MV PF                  '       d   R5MR64      pEV PQ                  VE4       XD# )7rQ   r_   NrR   z_For self attention, input hidden state for LoRA q and k/v weights shall be same. Got %s, %s, %srS   zeFor cross attention, input hidden state for LoRA q and k/v weights shall be different. Got %s, %s, %srT   rU   zBweights are in fp16. Please run fp16 conversion after optimizationrV   rW   rX   rY   rZ   r[   r\   r]   r   r`   ra   rc   rd   ri   rj   rm   Frn   rp   Reshape_LoRA_QReshape_LoRA_KReshape_LoRA_Vr,   Concat_LoRA_QKVrs   Reshape_LoRA_QKVAddAdd_Weights_QKVrq   rr   ru   Concat_LoRA_KVReshape_LoRA_KVAdd_Weights_KVrv   rw   ry   r   rz   r   r{   r|   r}   r~   r   ))r   r   match_parentmatch_lora_pathr0   rE   r   r<   rf   r   r=   r6   r   r   r3   r   r   r   r   r   r   r   FLOATr   r   r   r   re   r   rM   r   r   r   r   r   r   r   r   r   r   r   )Fr$   r   r   r   r   r   r0   rM   r   rJ   rK   rL   q_lora_nodesq_lora_last_nodeq_lora_matmul_1k_lora_nodesk_lora_last_nodek_lora_matmul_1v_lora_nodesv_lora_last_nodev_lora_matmul_1r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   lora_weight_shape_tensor_nameq_lora_reshape_node_nameq_lora_reshape_nodek_lora_reshape_node_namek_lora_reshape_nodev_lora_reshape_node_namev_lora_reshape_nodeqkv_lora_concat_node_nameqkv_lora_concat_node'reshaped_lora_weights_shape_tensor_nameqkv_lora_reshaped_node_nameqkv_lora_reshaped_nodeadd_weights_node_nameadd_weights_nodeshape_tensor_namer   r   r   r   r   r    kv_lora_weight_shape_tensor_namekv_lora_concat_node_namekv_lora_concat_node*reshaped_kv_lora_weights_shape_tensor_namekv_lora_reshaped_node_namekv_lora_reshaped_nodeadd_kv_weights_node_nameadd_kv_weights_noder   r   r   r   r   sF   &&&&&&&&                                                              r   create_attention_node_lora.FusionAttentionUnet.create_attention_node_lora  s;   0 !% 7 77::**<1E::**<1E::**<1E++L9.:+	++L9.:+	?++L9.:+	?~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	   %%a(E1"((+u4"((+u4u#))!,#))!,#))!,	  5 ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	   %%a(E1#))!,0E0Ea0HHNN1%.) $))!,#))!,#))!, ? 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( #LL]^!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhrl34K%%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5)//$**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 1A0E0EHX0X-$$6)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF -1JJ,G,G^o,G,p)'-'7'7+2215+2215+2215
 7?@2	($ %..55v7L7LVUV7W6XYJNJ^J^,,-A-F-FG ;O:S:SVf:f7$$@)//QA	* %  /3jj.I.I)as.I.t+)/)9)9077:<cd86AB4	*& MQL`L`,,-C-H-HI )-

(C(CEWh(C(i%#)#3#3299!<k>P>PQR>ST2V;<.	$  GKFZFZ,,-=-B-BC %:<L$L!$$*)//Q1a %   &//,33A68IJ0<?@.;	  CGBVBV,,\->->?!!((#+++,.($	 $$++XYe,tu "&**"="=>R"S$$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5)//#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 4D3H3HK[3[0$$9)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fx]m+F+n(&,&6&6/66q9;N;U;UVW;XY5>?1	'# $--44f6K6KFTU6V5WXIMI]I],,-@-E-EF >Q=U=UXh=h:$$C)//QA	* %  .2ZZ-H-H`q-H-r*(.(8(8/66q9;ef7&@A3	)% LPK_K_,,-B-G-GH ,0::+F+FuZj+F+k(&,&6&6188;[=O=OPQ=RS5>?1	'# JNI]I],,-@-E-EF %=?O$O!$$*)//Q1a %   &///66q9;LM0;>?1J>	  CGBVBV,,\->->?!!((#+++-+$
 $$++X,\]  88Q,BJJ?;${2!''	 	 	
 )))$7,$F#G ((( !''*'+5$ 
  ))-d6L6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)?)? )*11))) 1 ((( 6& 	 	l+r   c           
     B   V P                  WV4      '       d   R # V P                  P                  VR^ 4      pVf0   V P                  '       g   V P                  P                  VR^ 4      pVf   R # VP                  ^ ,          pW%,          pR pV F  pVP
                  R8X  g   K  Tp M	  Vf   R # V P                  WW4      ;'       g    V P                  WW4      p	V	e~   V	w  rrrpTpV P                  WV
4      w  ppV^ 8:  d   \        P                  R4       R # V P                  VVVVVVP                  ^ ,          VP                  ^ ,          R7      pVf   R # MV P                  WW4      ;'       g    V P                  WW4      p	V	f   R # V	w  rrpppTpV P                  WV
4      w  ppV^ 8:  d   \        P                  R4       R # V P                  VVVVVVP                  ^ ,          VP                  ^ ,          R7      pVf   R # V P                  WV
4      w  ppV^ 8:  d   \        P                  R4       R # V P                  P!                  V4       V P"                  V P$                  VP&                  &   V P(                  P+                  VV.4       RV n        R # )Nr   rp   *fuse_attention: failed to detect num_headsr0   rM   T)fuse_a1111_fp16r   r   r   rM   r.   match_qkv_torch1match_qkv_torch2rG   rE   r   r   match_qkv_torch1_loramatch_qkv_torch2_lorar   r   appendr   r   re   r   r   prune_graph)r$   normalize_nodeinput_name_to_nodesoutput_name_to_nodenode_before_layernorm
root_inputchildren_nodesskip_addnode	match_qkvr)   reshape_qkvtranspose_qkvr(   matmul_qmatmul_kmatmul_vattention_last_nodeq_num_headsq_hidden_sizenew_nodematmul_add_qmatmul_add_kmatmul_add_vs   &&&&                    r   fuseFusionAttentionUnet.fuseU  s   EXYY $

 7 7q Q !(1H1H1H$(JJ$;$;NIWX$Y! (*11!4
,8"D||u$ # ))*?nn4CXCXYcCn	 ]fZIMhRZ"-)-)K)KIgp)q&KaIJ 11$++A.*11!4 2 H    22:H  DLfLfMI  irfIMlLZf"-)-)K)KIgp)q&KaIJ 66$++A.*11!4 7 H )-)K)KIgp)q&KaIJ  *6:6J6J$$X]]3##%8-$HI  r   c           
        VP                   ^ ,          V8X  d   ^M^ pV P                  P                  V. ROVRR^ ^ ^ .4      pVf   R# Vw   rVrupV P                  P                  V. RO. R	O4      p	V	f   \        P	                  R4       R# V	w    rZV P                  P                  V. R
O. RO4      pVe   Vw  rpMBV P                  P                  V. RO. RO4      pVe   Vw  rrM\        P	                  R4       R# V P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  ppppV P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw     ppRWgVVVV
3# )z.Match Q, K and V paths exported by PyTorch 1.*N&fuse_attention: failed to match v path'fuse_attention: failed to match qk path&fuse_attention: failed to match q path&fuse_attention: failed to match k pathF)r   r_   rp   	Transposerp   r_   rp   r  rp   r_   r^   r   r   r   SoftmaxMulr_   r   r   r   r  r   r  r_   r   r   r   r   )r  rp   r  rp   r_   r^   r   r   r   r   r0   r   match_parent_pathrE   r   )r$   r   r   another_input	qkv_nodes_r   r   
matmul_qkvv_nodesr   qk_nodes_softmax_qk_mul_qk	matmul_qk	_add_zeroq_nodes_transpose_qr(   r   k_nodesr   s   &&&                   r   r   $FusionAttentionUnet.match_qkv_torch1  s   %^^A.*<!JJ00JD$1a0
	 <E9AMj **..z;hjvw?LLAB%Aq:://
<XZcd08-[9zz33J@ceqrH#?G<)FG**..y:giuv?LLAB18.L)X**..QSb
 ?LLAB!(Aq!Xk)XxQYYYr   c           	     N   VP                   ^ ,          V8X  d   ^M^ pV P                  P                  V. R
OVRR^ ^ .4      pVf   R# Vw   rVrxV P                  P                  V. RO. RO4      p	V	f   \        P	                  R4       R# V	w   rZV P                  P                  VRR.^ ^ .4      pVe   Vw  rM\        P	                  R4       R# V P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  ppppV P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  p ppV P                  P                  V. RO. RO4      pVe   VR,          V8w  d   \        P	                  R4       R# R	WgVVVV
3# )z.Match Q, K and V paths exported by PyTorch 2.*r_   Nr
  r  r  r  r  z*fuse_attention: failed to match mul_q pathT)r   r_   rp   r  r_   )r  rp   r_   r^   r   r   )r  r  rp   r_   r   Nr   r   r^   Nr   r   SqrtDivr-  CastSliceShaper  rp   Nr   r^   r   r   r   r   r   r  )r$   r   r   r  r  r  r   r   r  r  r   r  r   r"  r$  mul_qr%  r(   r   r&  _mul_kr   mul_q_nodess   &&&                    r   r   $FusionAttentionUnet.match_qkv_torch2  s   %^^A.*<!JJ00?D$1-
	 9B6AM**..z;]_hi?LLAB"A:://
Y<QTUWXSYZ'/$[)LLBC**..y:cetu?LLAB5<2i**..y:cetu?LLAB#* Ax jj22U'

 +b/Y">LLEF[HhPXXXr   c                   VP                   ^ ,          V8X  d   ^M^ pV P                  P                  V. ROV^ RR^ ^ ^ .4      pVf   R# Vw    rVrupV P                  P                  V. RO. R	O4      p	V	f   \        P	                  R4       R# V	w    rZV P                  P                  V. R
O. RO4      pVe   Vw  rpMBV P                  P                  V. RO. RO4      pVe   Vw  rrM\        P	                  R4       R# V P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  ppppV P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw     ppRWgVVVV
3# )zJMatch Q, K and V paths exported by PyTorch 1 that contains LoRA patterns.*N+fuse_attention: failed to match LoRA v path,fuse_attention: failed to match LoRA qk path+fuse_attention: failed to match LoRA q path+fuse_attention: failed to match LoRA k pathF)r   r   r_   rp   r  rp   r_   )rp   r  rp   r   r  r  r  r  r  )r  rp   r  rp   r   r  r  )r$   r   r   r  r  r  r   r   r  r  r  r  r   r!  r"  r#  r$  r%  r(   r  r&  r  s   &&&                   r   r   )FusionAttentionUnet.match_qkv_torch1_lora  s   %^^A.*<!JJ00QAtT1a3
	
 ?H<Aq} **..z;egst?LLFG")Aq:://
<XZcd08-[9zz33J@ceqrH#?G<)KL**..y:dfrs?LLFG5<2L)\**..NP_
 ?LLFG%,"Aq!\k)\<Yeeer   c           
     R   VP                   ^ ,          V8X  d   ^M^ pV P                  P                  V. R
OV^ RR^ ^ .4      pVf   R# Vw    rVrxV P                  P                  V. RO. RO4      p	V	f   \        P	                  R4       R# V	w   rZV P                  P                  VRR.^ ^ .4      pVe   Vw  rM\        P	                  R4       R# V P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  ppppV P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  p ppV P                  P                  V. RO. RO4      pVe   VR,          V8w  d   \        P	                  R4       R# R	WgVVVV
3# )zJMatch Q, K and V paths exported by PyTorch 2 that contains LoRA patterns.*r_   Nr9  r  r:  r;  r<  z/fuse_attention: failed to match LoRA mul_q pathT)r   r   r_   rp   r  r_   )r  rp   r   r)  )r  r  rp   r   r*  r+  r,  r2  r3  r  )r$   r   r   r  r  r  r   r   r  r  r  r  r   r"  r$  r4  r%  r(   r  r&  r5  r  r6  s   &&&                    r   r   )FusionAttentionUnet.match_qkv_torch2_loraD  s   %^^A.*<!JJ00FAtT1a0
	
 <E9Aq}**..z;Z\ef?LLFG&A:://
Y<QTUWXSYZ'/$[)LLGH**..y:`bqr?LLFG9@6i**..y:`bqr?LLFG'.$A| jj22U'

 +b/Y">LLJK[L,Xdddr   c                    < V ^8  d   QhRS[ /# )r   add_node)r   )r   r   s   "r   r   r   w  s     * **r   c                   V P                   P                  VR R .^^ .4      pVe   Vw  r4W43# V P                   P                  V. RO. RO4      pVe	   Vw  rVpWT3# V P                   P                  V. RO. RO4      pVe
   Vw  p rdWT3# R# )r_   N)r  r_   r_   r)  )r  r  r_   r_   r  )r   r  )r$   rA  
lora_nodeslora_matmul_2_nodelora_matmul_1_nodelora_mul_noder  s   &&     r   r   #FusionAttentionUnet.match_lora_pathw  s     ZZ11x F

 !7A4&;; ZZ11'

 !5?2]1!66 ZZ11.

 !8B5]Aq!66r   c           
        V P                   P                  VRR.^ ^ .4      pVf(   V P                   P                  VRR.^ ^ .4      pVf   R# Vw  rVVP                  ^ ,          pW',          pRp	V F  p
V
P                  R8X  g   K  T
p	 M	  V	f   R# V P	                  Wy4      pVf   R# Vw  ppppppV P                   P                  VR^ 4      pV P                   P                  VR^ 4      pV P                   P                  VR^ 4      pVe-   Ve)   V P                  '       g	   VV8X  d   MVV8w  d   VV8X  g   R# VP                  ^ ,          VP                  ^ ,          8w  d   R# TpV P                  VR4      ;'       g    V P                  VR4      pV^ 8:  d   \        P                  R4       R# V P                  V4      pV P                  VVVVVVP                  ^ ,          VP                  ^ ,          R7      pVf   R# V P                  P                  V4       V P                  V P                   VP"                  &   V P$                  P'                  VV.4       RV n        R# )	zPFuse attention of fp16 UNet exported in A1111 (stable diffusion webui) extensionr/  r   Nrp   FTr   r   )r   r  rM   r.   match_qkv_a1111r   r   r0   r9   rE   r   r@   r   r   r   r   r   re   r   r   r   )r$   r   r   r   
entry_path_castr   r   r   r   r   r   r   r   r(   r   r   r   cast_qcast_kcast_vr   r  r  r  s   &&&&                     r   r   #FusionAttentionUnet.fuse_a1111_fp16  sl   ZZ11.65/TUWXSYZ
55nvyFY\]_`[abJ!'1$*11!4
,8"D||u$ # ((>	 	
 ((61=((61=((61=")-)@)@)@6!fPVFV& <<?n33A66)((D9aaT=O=OPY[`=a!LLEF,,^< --..#&--a0 . 
   *6:6J6J$$X]]3##%8-$HI  r   c           
        VP                   ^ ,          V8X  d   ^M^ pV P                  P                  V. ROVRR^ ^ ^ .4      pVf   R# Vw   rVrxp	V P                  P                  V	. RO. RO4      p
V
f   \        P	                  R4       R# V
w    r[V P                  P                  V	. R	O. R
O4      pVe   Vw   r]r^M\        P	                  R4       R# V P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw  ppppV P                  P                  V. RO. RO4      pVf   \        P	                  R4       R# Vw    ppWgVVVV3# )zKMatch Q, K and V paths exported by A1111 (stable diffusion webui) extensionNr
  r  r  r  )r   r_   rp   r  rp   Einsumr  r  )r/  r/  r  r  rQ  )r   r   r   r   Nr  r  )r$   r   r   r  r  r  r   r   reshape_einsum
einsum_qkvr  r   r  r   	einsum_qkr$  r%  r(   r   r&  r   s   &&&                  r   rI  #FusionAttentionUnet.match_qkv_a1111  se   %^^A.*<!JJ00JD$1a0
	 IRFAM:**..z;hjvw?LLAB%Aq:://DFX
 08-Q9LLBC**..y:giuv?LLAB18.L)X**..y:giuv?LLAB%Aq(9h(RRr   )r   r   r   r#   r   r   r"   r   )F)__name__
__module____qualname____firstlineno____doc__r!   r9   r@   rG   r   r   r  r   r   r   r   r   r   rI  __static_attributes____classdictcell____classcell__)r%   r   s   @@r   r   r      s     ( (: 8 &  &D@ @DT TlX t/Zb2Yh.f`1ef* *XL\*S *Sr   r   )loggingr   numpyr3   fusion_baser   fusion_utilsr   onnxr   r   r   
onnx_modelr	   rV  rE   r    r   r   <module>re     s6   
    $ / /  	8	KS& KSr   