+
    9i=b                     |    ^ RI Ht ^ RIt^ RIHt ^ RIHt ^ RIH	t	H
t
HtHt ^ RIHt ]! ]4      t ! R R]4      tR# )	    )	getLoggerN)Fusion)FusionUtils)	NodeProtoTensorProtohelpernumpy_helper	OnnxModelc                     a a ] tR t^t oRtV3R lV 3R lltRV3R lR lltV3R lR ltV3R lR	 ltV3R
 lR lt	V3R lR lt
V3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltR tRtVtV ;t# )FusionMultiHeadAttentionMMDitzG
Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT).
c                    < V ^8  d   QhRS[ /# )   modelr
   )format__classdict__s   "g/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/fusion_mha_mmdit.py__annotate__*FusionMultiHeadAttentionMMDit.__annotate__   s     ' 'i '    c                <   < \         SV `  VR R.R7       / V n        R# )MultiHeadAttentionSoftmax)fused_op_typesearch_op_typesN)super__init__unsqueeze_update_map)selfr   	__class__s   &&r   r   &FusionMultiHeadAttentionMMDit.__init__   s$    .BU^T_`$&!r   c                &   < V ^8  d   QhRS[ RS[/# )r   
start_nodereturn)r   int)r   r   s   "r   r   r      s      	 Z] r   c                `   V P                   P                  V. ROV^ ^.VR7      pVf   ^ # VR,          p\        VP                  4      ^8w  d   ^ # V P                   P	                  VP                  ^,          4      pVf   ^ # \        VP
                  4      ^8w  d   ^ # \        V^ ,          4      # )a8  
Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x:

        MatMul    .. [-1] [24] ..
         |        |  |  /   /
        Add     Concat(axis=0)
          |      /
          Reshape
             |
         Transpose(perm=0,1,3,2)
             |
       (start_node)
output_name_to_node)	TransposeReshapeConcat)r   match_parent_pathleninputget_constant_valueshaper%   )r   r#   r(   input_indexnodesconcat_shapevalues   &&&&   r   get_num_heads+FusionMultiHeadAttentionMMDit.get_num_heads   s     

,,:[!Q<Oex - 
 =Ry|!!"a'

--l.@.@.CD=u{{q 58}r   c                ,   < V ^8  d   QhRS[ RS[RS[/# )r   transpose_kconcat_before_transposer$   )r   boolr%   )r   r   s   "r   r   r   :   s$     / /	 /im /ru /r   c                (   V'       dH   V P                   P                  VRR.^ ^.VR7      pV'       d   V P                  V^,          V4      #  ^ # V P                   P                  VR.^ .VR7      pV'       d   V P                  V^ ,          V4      # ^ # )a  
        Detect num_heads from subgraph like the following (num_heads=24 in this example):
                       MatMu    .. [-1] [24] ..
                         |       |  |  /   /
                        Add     Concat
                          |      /
                         Reshape
                            |
                     Transpose(perm=0,2,1,3)
                            |
                     SimplifiedLayerNormalization
                            |
                    Transpose(perm=0,1,3,2)

        Another variant is to an extra Concat node to join two symmetrical subgraphs:

                   |              |
                  MatMul        MatMul   .. [-1] [24] ..
                   |              |       |  |  /   /
                  Add  Concat    Add      Concat
                    |  /          |      /
                  Reshape         Reshape
                    |              |
                 Transpose     Transpose(perm=0,2,1,3)
                    |              |
SimplifiedLayerNormalization  SimplifiedLayerNormalization
                        |     /
                       Concat
                         |
                    Transpose(perm=0,1,3,2)

            Both patterns are used in stable diffusion 3.5 model.
r+   SimplifiedLayerNormalizationr'   )r   r-   r6   )r   r9   r(   r:   r3   s   &&&& r   get_num_heads_from_k2FusionMultiHeadAttentionMMDit.get_num_heads_from_k:   s    D #JJ00h(FG!Qex 1 E ))%(4GHH   JJ00<=sXk 1 E ))%(4GHHr   c                ,   < V ^8  d   QhRS[ RS[ RS[ /# )r   
input_nameoutput_namer$   str)r   r   s   "r   r   r   k   s"     # # ## ## #r   c                   RpV P                   P                  V4      pVfV   \        P                  ! \        P
                  ! . RORR7      VR7      pV P                   P                  W@P                  4       \        P                  ! RW.V.V P                   P                  R4      R7      pV P                  P                  V4       V P                  V P                  VP                  &   VP                  ^ ,          # )zAdd a Reshape node to convert 4D BxSxNxH to 3D BxSxD.

Args:
    input_name (str): input name for the 4D tensor of shape BxSxNxH.
    output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H.

Returns:
    str: the output name
bsnh_to_bsd_reshape_dimsint64)dtype)namer*   inputsoutputsrI   )r   r   r,   )r   get_initializerr	   
from_arraynparrayadd_initializerthis_graph_namer   	make_nodecreate_node_namenodes_to_addappendnode_name_to_graph_namerI   output)r   rA   rB   new_dims_namenew_dims	reshape_qs   &&&   r   reshape_to_3d+FusionMultiHeadAttentionMMDit.reshape_to_3dk   s     3::--m<#..rxx
'/RYfgHJJ&&x1E1EF$$. M,,Y7	
	 	  +7;7K7K$$Y^^4""r   c                4   < V ^8  d   QhRS[ RS[R,          /# r   mul_qr$   Nr   rD   )r   r   s   "r   r   r      s&     .H .HY .H`cfj`j .Hr   c                   V P                   P                  VRR.^ ^ .4      pVf   R# Vw  rE\        P                  ! VR. RO4      '       g   R# VP                  ^ ,          VP                  ^ &   VP
                  ^ ,          pVR,           VP
                  ^ &   V P                  VP
                  ^ ,          VR,           4      # )a  
MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

Before:
                       MatMul
                         |
                       Add      Concat
                         |      /
                         Reshape
                          |
                       Transpose(perm=0,2,1,3)
                          |
               SimplifiedLayerNorm
                          |
                         Mul

After:
                       MatMul
                         |
                        Add      Concat
                         |      /
                         Reshape
                           |
                   SimplifiedLayerNorm
                           |
                Reshape (shape=[0, 0, -1])
r=   r)   Nperm_BSNH_BSDr   r         )r   r-   r   check_node_attributer/   rX   r\   )r   r`   r(   pathsln_atranspose_a
sln_outputs   &&&    r   'adjust_query_from_bnsh_to_bsd_no_concatEFusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd_no_concat   s    : zz+++[9F

 <!//V\RR %**1-A\\!_
$w.Q!!%,,q/:3FGGr   c                4   < V ^8  d   QhRS[ RS[R,          /# r_   ra   )r   r   s   "r   r   r      s*     MX MX9 MXVY\`V` MXr   c                   V P                   P                  V. R
O. RO4      pVf   R# Vw  rEp\        VP                  4      ^8w  d   R# V P                   P                  VRR.^^ .4      pVf   R# Vw  rx\        P
                  ! VR. RO4      '       g   R# \        P
                  ! VR. RO4      '       g   R# \        P
                  ! VR^4      '       g   R# VP                  ^ ,          VP                  ^ &   VP                  ^ ,          VP                  ^ &   \        P                  ! RVP                  ^ ,          VP                  ^ ,          .VP                  ^ ,          R,           .V P                   P                  R4      ^R7      p	V P                  P                  V	4       V P                  V P                  V	P                  &   V P                  V	P                  ^ ,          VP                  ^ ,          R	,           4      # )a  
MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

    Before:
              MatMul      MatMul
                |            |
                Add Concat  Add    Concat
                 |    /      |      /
                 Reshape     Reshape
                    |           |
Transpose(perm=0,2,1,3)      Transpose(perm=0,2,1,3)
                    |           |
    SimplifiedLayerNorm  SimplifiedLayerNorm
                    |     /
                    Concat(axis=2)
                     |
                    Mul

    After:
           MatMul        MatMul
             |              |
            Add Concat     Add     Concat
             |    /         |     /
             Reshape       Reshape
                |            |
   SimplifiedLayerNorm  SimplifiedLayerNorm
                |       /
              Concat(axis=1)
                 |
              Reshape (shape=[0, 0, -1])
r+   r=   r)   Nrc   axisrd   rK   rL   rI   rr   re   )r+   r=   r)   )r   r   r   rf   )r   r-   r.   r/   r   ri   r   rS   rX   rT   rU   rV   rR   rW   rI   r\   )
r   r`   r(   rj   concatrk   rl   sln_btranspose_bnew_concat_nodes
   &&&       r   adjust_query_from_bnsh_to_bsd;FusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd   s   B zz++C

 <%)"{v||!zz+++[9F

 <!//V\RR//V\RR//BB %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:!!/"8"8";V]]1=MPV=VWWr   c                &   < V ^8  d   QhRS[ RS[/# )r   	unsqueezer$   ra   )r   r   s   "r   r   r     s     "( "(i "(C "(r   c                   V P                   P                  VP                  4      pVEf   \        VP                  4      ^8X  dZ   \
        P                  ! RVP                  VP                  ^ ,          R,           .V P                  P                  R4      ^.R7      pMRpV P                  P                  V4      fP   \
        P                  ! V\        P                  ^.^.R7      pV P                  P                  WPP                  4       \
        P                  ! RVP                  ^ ,          V.VP                  ^ ,          R,           .V P                  P                  R4      R7      pV P                   P#                  V4       V P                  V P$                  VP                  &   VP                  ^ ,          pW P                   VP                  &   V# )N	Unsqueezerd   )rK   rL   rI   axesunsqueeze_axes_2)rI   	data_typedimsvalsrJ   )r   getrI   r.   r/   r   rS   rX   r   rT   rM   make_tensorr   INT64rQ   rR   rU   rV   rW   )r   r{   updated_unsqueeze_outputnew_nodeinitializer_namer   s   &&    r   update_unsqueeze_axes_1_to_2:FusionMultiHeadAttentionMMDit.update_unsqueeze_axes_1_to_2  s   #'#<#<#@#@#P #+9??#q(!++$??&--a07:;44[A $6 ::--.>?G'-'9'9-"-"3"3SS	($ JJ../?AUAUV!++%OOA.0@A&--a07:;44[A	 $$X.:>:N:ND((7'/q'9$8P%%inn5''r   c                B   < V ^8  d   QhRS[ RS[S[S[ 3,          RS[/# )r   addr(   r$   )r   dictrD   r;   )r   r   s   "r   r   r   (  s-     3 3 3cS\nI] 3bf 3r   c                   \        VP                  4      ^8w  d   R# V P                  P                  V. RO. ROV4      pVf   R# \	        V P                  4      pVP                  V^,          4      pVe	   V^.8w  d   R# VP                  V^,          4      pVe	   V^ .8w  d   R# V P                  P                  V. RO. ROV4      pVf   R# VP                  V^,          4      pVe	   V^.8w  d   R# VP                  V^,          4      pVe	   V^ .8w  d   R# V P                  V^,          4      V^ ,          P                  ^&   V P                  V^,          4      V^ ,          P                  ^&   R# )a.  
Update axes of Unsqueeze from [1] to [2] in the following pattern:
          Unsqueeze        Unsqueeze
          (axes=[0])       (axes=[0])
             |              |
          Unsqueeze        Unsqueeze
      ... (axes=[1])  ...  (axes=[1])
        |     /        |   /
           Mul         Mul
            |       /
             Add
Args:
    add (NodeProto): the Add node
    output_name_to_node (Dict[str, NodeProto]): mapping from output name to node

Returns:
    bool: True if the pattern is matched and updated successfully, False otherwise.
FT)Mulr}   r}   )rg   rg   r   )r   rg   r   )r.   r/   r   r-   r   get_squeeze_or_unsqueeze_axesr   )r   r   r(   nodes_bfusion_utilsaxes_1axes_0nodes_as   &&&     r   update_unsqueeze_axes3FusionMultiHeadAttentionMMDit.update_unsqueeze_axes(  sL   & syy>Q **..s4UW`buv?"4::.;;GAJG>Vs];;GAJG>Vs] **..s4UW`buv?;;GAJG>Vs];;GAJG>Vs]"??
K
"??
K
r   c                4   < V ^8  d   QhRS[ RS[R,          /# r_   ra   )r   r   s   "r   r   r   ]  s*     RI RI	 RI[^ae[e RIr   c                   V P                   P                  V. R
O. RO4      pVf   R# Vw  rErgp\        VP                  4      ^8w  d   R# V P                   P                  VRR.^^ .4      pVf   R# Vw  r\        P
                  ! VR. RO4      '       g   R# \        P
                  ! V
R. RO4      '       g   R# \        P
                  ! VR^4      '       g   R# V P                  WB4      '       g   R# VP                  ^ ,          VP                  ^ &   V
P                  ^ ,          V	P                  ^ &   \        P                  ! RVP                  ^ ,          V	P                  ^ ,          .VP                  ^ ,          R,           .V P                   P                  R4      ^R7      pV P                  P                  V4       V P                  V P                  VP                  &   V P                   P!                  VP                  ^ ,          VP                  ^ ,          4       V P#                  VP                  ^ ,          VP                  ^ ,          R	,           4      # )a[  
Adjust graph to change query format from BNSH to BSD for Flux model.
Note that the graph pattern is complex, and we only do a shallow match here.

Before:
               |               |
Transpose(perm=0,2,1,3)    Transpose(perm=0,2,1,3)
                |              |
SimplifiedLayerNorm  SimplifiedLayerNorm
                |             /
                Concat(axis=2)
                 |
                Mul     Mul
                 |    /
                  Add
                   |
                  Mul

After (Transpose nods are removed, and a Reshape is added):

                |           |
    SimplifiedLayerNorm  SimplifiedLayerNorm
                |         /
            Concat(axis=1)
                |
                Mul    Mul
                 |    /
                  Add
                   |
               Reshape (shape=[0, 0, -1])
r+   r=   r)   Nrc   rr   rd   rs   re   )Addr   r+   r=   r)   )r   r   r   r   r   rf   )r   r-   r.   r/   r   ri   r   r   rS   rX   rT   rU   rV   rR   rW   rI   replace_input_of_all_nodesr\   )r   r`   r(   rj   r   _mul_art   rk   rl   ru   rv   rw   s   &&&         r   "adjust_flux_query_from_bnsh_to_bsd@FusionMultiHeadAttentionMMDit.adjust_flux_query_from_bnsh_to_bsd]  s   B zz++Q

 <26/VKv||!zz+++[9F

 <!//V\RR//V\RR//BB ))#CC %**1-A$**1-A **LLOU\\!_5]]1%/0,,X6
 	  1=A=Q=Q$$_%9%9:

--fmmA.>@V@VWX@YZ!!#**Q-A1GHHr   c                4   < V ^8  d   QhRS[ RS[R,          /# r_   ra   )r   r   s   "r   r   r     s&     1I 1Iy 1Ibehlbl 1Ir   c                   V P                   P                  V. RO. RO4      pVf   R# Vw  rErg\        P                  ! VR. RO4      '       g   R# V P	                  WB4      '       g   R# VP
                  ^ ,          VP
                  ^ &   VP                  ^ ,          R,           VP                  ^ &   V P                  VP                  ^ ,          VP                  ^ ,          R,           4      # )ax  
Adjust graph to change query format from BNSH to BSD for Flux model.
Note that the graph pattern is complex, and we only do a shallow match here.

Before:
              |
            Transpose(perm=0,2,1,3)
              |
            SimplifiedLayerNorm
              |
             Mul     Mul
               |   /
               Add
                |
               Mul

After (Transpose is removed, and a Reshape is added):

                |
              SimplifiedLayerNorm
                |
                Mul   Mul
                 |   /
                 Add
                  |
               Reshape (shape=[0, 0, -1])
Nrc   rd   re   )r   r   r=   r)   )r   r   r   r   rf   )r   r-   r   ri   r   r/   rX   r\   )r   r`   r(   rj   r   r   rk   rl   s   &&&     r   )adjust_flux_single_query_from_bnsh_to_bsdGFusionMultiHeadAttentionMMDit.adjust_flux_single_query_from_bnsh_to_bsd  s    : zz++G

 <*.'U//V\RR ))#CC %**1-A

1/

1!!#**Q-A1GHHr   c                4   < V ^8  d   QhRS[ RS[ R,          /# )r   qr$   NrC   )r   r   s   "r   r   r     s      ; ;s ;CRVJ ;r   c           
     @   \         P                  ! R V.VR,           .V P                  P                  R RR7      . ROR7      pV P                  P                  V4       V P                  V P                  VP                  &   V P                  VR,           VR,           4      # )r)   rd   Transpose_BNSH_to_BSNH)name_prefix)rI   rc   re   rf   )
r   rS   r   rT   rU   rV   rR   rW   rI   r\   )r   r   r(   transpose_qs   &&& r   transpose_reshape_bnsh_to_bsd;FusionMultiHeadAttentionMMDit.transpose_reshape_bnsh_to_bsd  s    &&C[M,,[F^,_
 	  -9=9M9M$$[%5%56!!!g+q6z::r   c                >   < V ^8  d   QhRS[ RS[ RS[ RS[ RS[RS[/# )r   r   kvrX   	num_headsr$   )rD   r%   r   )r   r   s   "r   r   r     sG     ) )) ) 	)
 ) ) 
)r   c                    V^ 8  g   Q hWV.pV.p\         P                  ! RVVV P                  P                  R4      R7      pRVn        VP
                  P                  \         P                  ! RV4      .4       V# )a.  
Create a MultiHeadAttention node.

Args:
    q (str): name of q
    k (str): name of k
    v (str): name of v
    output (str): output name of MHA
    num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

Returns:
    NodeProto: the node created.
r   rJ   zcom.microsoftr   )r   rS   r   rT   domain	attributeextendmake_attribute)	r   r   r   r   rX   r   
mha_inputsmha_outputsmha_nodes	   &&&&&&   r   create_multihead_attention_node=FusionMultiHeadAttentionMMDit.create_multihead_attention_node  s    , 1}} AY
 h## ,,-AB	
 *!!6#8#8i#P"QR r   c                    VP                   R 8X  g   Q hTpV P                  P                  VP                  ^ ,          4      '       d   R# V P                  P	                  V. RO. ROV4      pVf   R# Vw  rgp\
        P                  ! VR. RO4      '       g   R# V P                  P                  V. RO. RO4      p	V	f   R# V	w  rrp ppVP                  ^ ,          pVVP                  ^ ,          8w  d   R# V P                  P                  V
RR.^^ .4      pVf   R# Vw  ppVP                  ^ ,          p\
        P                  ! VR. RO4      '       g   R# V P                  P                  VRR.^^ .4      pVf   R# V^ ,          P                  ^ ,          VP                  ^ ,          8w  d   R# VP                  ^,          pV P                  P                  VR^VR7      pVe   V P                  P                  VR^ VR7      pVf   R# \
        P                  ! VR. RO4      '       g   R# V P                  P                  VR^VR7      pVf   R# \
        P                  ! VR. RO4      '       g   R# MGV P                  P                  VR^VR7      pVf   R# \
        P                  ! VR. RO4      '       g   R# V'       d   V P                  VV4      MV P                  Wc^R	7      pV^ 8X  d   V P                  VVVRJ4      pV^ 8:  d   R# Ve   V P                  W4      pMV P                  W4      pVf=   V P                  W4      pVf(   V P                  W4      pVf   V P!                  VV4      pV P#                  VVVVP                  ^ ,          VR
7      pV P$                  P'                  V4       V P(                  V P*                  VP,                  &   V P.                  P1                  WgV.4       RV n        R# )r   Nr)   rc   r   SqrtDivr+   )r2   r(   )r2   )r   r   r   rX   r   T)MatMulr)   r*   )r   r   r   r   rf   )r   r   r   r   r   CastSliceShape)r   r   rg   r   rg   r   r   r   )r   rg   rh   r   )op_typer   find_graph_outputrX   match_child_pathr   ri   r-   r/   match_parentr6   r>   rx   rn   r   r   r   r   rU   rV   rR   rW   rI   nodes_to_remover   prune_graph)r   nodeinput_name_to_nodesr(   softmaxr3   
matmul_s_vtranspose_outreshape_outq_nodes	matmul_qkr`   sqrt_q_2div_qsqrt_q_shape_qq_bnshk_nodesmul_kr9   r   k_scale_nodesr   concat_vtranspose_1transpose_2r   queryr   s   &&&&                          r   fuse"FusionMultiHeadAttentionMMDit.fuse  s   ||y((( ::''q(9::

++79QSf
 =16.
;//v|TT**..N$
 ?CJ@	(61aQW]]1%%**..y5+:NQRTUPVW?${a //V\RR

44UVUOaQRVT !!!$q(99Q ::**:xQdw*x **11+1J] 2 K "33KVV**11+1J] 2 K "33KVV W
 **11KQL_ 2 K "33KVV
  x)<=##JQR#S 	 >11+?RT\dhThiIA~ 66uRE@@\E=;;EWE}FFub= !>>vGZ[E77%%a( 8 
 	  *6:6J6J$$X]]3##Z$LM  r   )r   r   )r   )__name__
__module____qualname____firstlineno____doc__r   r6   r>   r\   rn   rx   r   r   r   r   r   r   r   __static_attributes____classdictcell____classcell__)r    r   s   @@r   r   r      s     ' ' B/ /b# #4.H .H`MX MX^"( "(H3 3jRI RIh1I 1If; ;) )V   r   r   )loggingr   numpyrO   fusion_baser   r   r   onnxr   r   r   r	   
onnx_modelr   r   loggerr    r   r   <module>r      s4   
    $ = =  	8	K
 F K
 r   