+
    9iR                        ^ RI Ht ^ RIt^ RIHt ^ RIHt ^ RIH	t	H
t
 ^ RIHtHt ^ RIHt ^ RIHtHtHtHtHt ^ R	IHt ]! ]4      t ! R
 R4      t ! R R4      t ! R R4      t ! R R4      t ! R R4      t ! R R4      t ! R R4      t  ! R R4      t! ! R R]4      t" ! R R]4      t# ! R R]"4      t$ ! R  R!]"4      t% ! R" R#]"4      t& ! R$ R%]"4      t' ! R& R']4      t(R# )(    )	getLoggerN)DynamoOnnxHelper)Fusion)AttentionOpTypeFusionOptions) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)NumpyHelper)
ModelProto	NodeProtoTensorProtohelpernumpy_helper	OnnxModelc                   &   a  ] tR t^t o R tRtV tR# )ProcessGemmWFuncc                0    \         P                  ! VR4      # )   r   r   )np	transposeselfxs   &&e/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/transformers/onnx_model_phi.py__call__ProcessGemmWFunc.__call__   s    ||Av&&     N__name__
__module____qualname____firstlineno__r   __static_attributes____classdictcell____classdict__s   @r   r   r      s     ' 'r   r   c                   &   a  ] tR t^t o R tRtV tR# )ProcessMatMulQFuncc                j    \         P                  ! \         P                  ! V^^ 4      ^ ,          R4      #    r   r   r   splitr   s   &&r   r   ProcessMatMulQFunc.__call__   %    ||BHHQ1-a0&99r   r    Nr!   r(   s   @r   r+   r+           : :r   r+   c                   &   a  ] tR t^t o R tRtV tR# )ProcessMatMulKFuncc                j    \         P                  ! \         P                  ! V^^ 4      ^,          R4      # r-   r/   r   s   &&r   r   ProcessMatMulKFunc.__call__   r2   r   r    Nr!   r(   s   @r   r5   r5      r3   r   r5   c                   &   a  ] tR t^#t o R tRtV tR# )ProcessMatMulVFuncc                j    \         P                  ! \         P                  ! V^^ 4      ^,          R4      # r-   r/   r   s   &&r   r   ProcessMatMulVFunc.__call__$   r2   r   r    Nr!   r(   s   @r   r9   r9   #   r3   r   r9   c                   &   a  ] tR t^(t o R tRtV tR# )ProcessBiasQFuncc                D    \         P                  ! V^R4      ^ ,          pV# r.   r   r0   r   s   &&r   r   ProcessBiasQFunc.__call__)       HHQ2q!r   r    Nr!   r(   s   @r   r=   r=   (         r   r=   c                   &   a  ] tR t^.t o R tRtV tR# )ProcessBiasKFuncc                D    \         P                  ! V^R4      ^,          pV# r?   rA   r   s   &&r   r   ProcessBiasKFunc.__call__/   rC   r   r    Nr!   r(   s   @r   rF   rF   .   rD   r   rF   c                   &   a  ] tR t^4t o R tRtV tR# )ProcessBiasVFuncc                D    \         P                  ! V^R4      ^,          pV# r?   rA   r   s   &&r   r   ProcessBiasVFunc.__call__5   rC   r   r    Nr!   r(   s   @r   rJ   rJ   4   rD   r   rJ   c                   &   a  ] tR t^:t o R tRtV tR# )ProcessRotCacheFuncc                    \        VP                  4      ^8X  g   Q hVP                  ^,          ^ 8X  d
   VR,          # V# )   ):NNN:r      N)lenshaper   s   &&r   r   ProcessRotCacheFunc.__call__;   s7    177|q   771:W:r   r    Nr!   r(   s   @r   rN   rN   :   s      r   rN   c                   \  a a ] tR t^Dt oV3R lV 3R lltV3R lR ltR tR tR tR t	R"R	 lt
R
 tR tR tV3R lR ltR#V3R lR lltR#V3R lR lltR$V3R lR lltR#V3R lR lltR#V3R lR lltR%V3R lR lltR%V3R lR lltR%V3R lR lltR&V3R lR  lltR!tVtV ;t# )'Fissionc                6   < V ^8  d   QhRS[ RS[S[,          /# )rP   modelnodes_to_find)r   liststr)formatr)   s   "r   __annotate__Fission.__annotate__E   s#     ; ;; Cy;r   c                *   < \         SV `  VR V4       R# )DONOTUSENsuper__init__)r   rX   rY   	__class__s   &&&r   rc   Fission.__init__E   s    
 	
M:r   c                    < V ^8  d   QhRS[ /# rP   attn_op_typer   )r\   r)   s   "r   r]   r^   L   s     ) )/ )r   c                    Wn         R # Nrh   )r   rh   s   &&r   set_attention_op_typeFission.set_attention_op_typeL   s    (r   c                4    VR ,           \        V4      ,           # )_r[   )r   layer_idnames   &&&r   	get_unameFission.get_unameO   s    czCM))r   c                    V F;  pW28X  g1   VP                  V4      '       g   VP                  V4      '       g   K9  Vu # 	  \        R V R24      h)zEdge z
 not found)endswith
startswith
ValueError)r   edgesrs   edges   &&& r   get_edge_by_nameFission.get_edge_by_nameR   sH    D|t}}T22dood6K6K  5j122r   c                :    V P                  VP                  V4      # rk   )r|   inputr   noders   s   &&&r   get_input_by_nameFission.get_input_by_nameX   s    $$TZZ66r   c                :    V P                  VP                  V4      # rk   )r|   outputr   s   &&&r   get_output_by_nameFission.get_output_by_name[   s    $$T[[$77r   c                   V P                   P                  V4      p\        P                  ! V4      pV! V4      p\        P
                  ! Vf
   VR,           MT\        P                  VP                  VP                  4       P                  4       RR7      pV P                   P                  WpP                  4       VP                  # )N
_processedT	data_typedimsvalsraw)rX   get_initializerr
   to_arrayr   make_tensorr   FLOATrS   flattentobytesadd_initializerthis_graph_namers   )r   initializer_namefunctorcustom_namei
i_np_arrayprocessed_i_np_array
new_tensors   &&&&    r   process_initializerFission.process_initializer^   s    JJ&&'78 ))!,
&z2''/:/B|+!''%++%--/779

 	

"":/C/CDr   c                    V P                   P                  4       P                  P                  4       pWn        \
        P                  VP                  P                  n	        R # rk   )
rX   graph
value_infoaddrs   r   r   typetensor_type	elem_typer   rs   new_value_infos   && r   add_fp32_value_infoFission.add_fp32_value_infol   C    ))+66::<"4?4E4E''1r   c                    V P                   P                  4       P                  P                  4       pWn        \
        P                  VP                  P                  n	        R # rk   )
rX   r   r   r   rs   r   INT64r   r   r   r   s   && r   add_int64_value_infoFission.add_int64_value_infoq   r   r   c                   V P                   P                  4       P                   FI  pVP                  V8X  g   K  V P                   P                  4       P                  P	                  V4        M	  \
        P                  ! V\        P                  VR 7      pV P                   P                  4       P                  P                  V.4       R# )r   rS   N)
rX   r   r   rs   remover   make_tensor_value_infor   r   extend)r   rs   rS   r   r   s   &&&  r   replace_fp32_value_infoFission.replace_fp32_value_infov   s    ****,77J$&

  "--44Z@ 8  66!''

 	

%%,,n-=>r   c                L   < V ^8  d   QhRS[ S[,          RS[RS[ S[,          /# )rP   subgraph_nodesrr   layer_known_edges_names)rZ   r   intr[   )r\   r)   s   "r   r]   r^      s4     O O"9oO9<OW[\_W`Or   c                   V EF@  p\        VP                  4       FV  w  rVVR 8X  d   K  Wc9  g   K  V P                  W&4      VP                  V&   V P                  VP                  V,          4       KX  	  \        VP                  4       FV  w  rVVR 8X  d   K  Wc9  g   K  V P                  W&4      VP                  V&   V P                  VP                  V,          4       KX  	  V P                  W$P
                  4      Vn        V P                  P                  V4       V P                  V P                  VP
                  &   EKC  	  R# ) N)
	enumerater   rt   r   r   rs   nodes_to_addappendr   node_name_to_graph_name)r   r   rr   r   new_noder   rs   s   &&&&   r   set_unique_name_and_add_nodes%Fission.set_unique_name_and_add_nodes   s     'H$X^^42:8(,x(FHNN1%,,X^^A->? 5 %X__52:8)-)GHOOA&,,X__Q-?@ 6 !NN8]]CHM$$X.:>:N:ND((7 'r   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# rP   inputsoutputsprefixrZ   r[   )r\   r)   s   "r   r]   r^      s*     
 
S	 
DI 
s 
r   c                    \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RR7      pV.# )r.   LayerNormalization_LayerNormalizationg   >)r   r   rs   epsilonrR   r   	make_noder   r   r   r   r   s   &&&& r   	layernormFission.layernorm   sT    6{a7|q    //)
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      s*      49 tCy # r   c                $   \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RV^ ,          V^,          .VR,           .VR,           R7      p\        P                  ! RVR,           V^,          .VVR,           R7      pWE.# )r.   MatMul
matmul_outr   r   rs   AddBiasr   )r   r   r   r   matmulr   s   &&&&  r   gemmFission.gemm   s    6{a7|q   !!1Ivay)l*+("	
 \)6!95&	
 }r   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      s*      T#Y c C r   c           
         \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RVVR7      pV.# )   RotaryEmbeddingcom.microsoft)r   r   rs   domainrotary_embedding_dim	num_headsr   )r   r   r   r   rot_dimr   r   s   &&&&&& r   rotaryFission.rotary   sZ    6{a7|q   ++"!(
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      s*     
 
tCy 
49 
c 
r   c                    \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RR7      pV.# )r   FastGelur   )r   r   rs   r   r   r   s   &&&& r   fastgeluFission.fastgelu   sS    6{a7|q   *$"
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      s*     	 	$s) 	d3i 	 	r   c                    \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           R7      pV.# )rP   r   r   r   r   s   &&&& r   r   Fission.add   sO    6{a7|q   %	
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      *      $s) d3i  r   c           
         \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RV^R7      pV.# )   MultiHeadAttentionr   )r   r   rs   r   r   unidirectionalr   r   r   r   r   r   r   s   &&&&& r   mhaFission.mha   sZ    6{a7|q    .."
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      r   r   c           
         \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RVVR7      pV.# )   GroupQueryAttentionr   )r   r   rs   r   r   kv_num_headsr   r   s   &&&&& r   gqaFission.gqa   sZ    6{a7|q   !//""
 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^      s*      S	 DI s r   c                    \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RV^^^ R7	      pV.# )   	Attentionr   )r   r   rs   r   r   r   	do_rotaryr   r   r   s   &&&&& r   	attentionFission.attention   s_    6{a7|q   +%"!#

 vr   c                L   < V ^8  d   QhRS[ S[,          RS[ S[,          RS[/# r   r   )r\   r)   s   "r   r]   r^     s1      S	 c 	r   c                    \        V4      ^8X  g   Q h\        V4      ^8X  g   Q h\        P                  ! RVVVR,           RVVVVR7	      pV.# )   PagedAttentionzvllm.ort.ext)r   r   rs   r   r   num_kv_heads	head_sizescaler   )r   r   r   r   r   r  r  r   s   &&&&&&& r   
paged_attnFission.paged_attn  sb     6{a7|q   **!"

 vr   rl   rk   )r   )r       r  )r   r  )r   r  P   g   %?)r"   r#   r$   r%   rc   rm   rt   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r&   r'   __classcell__rd   r)   s   @@r   rV   rV   D   s     ; ;) )*378F
F

?O O(
 
 " 
 
	 	      r   rV   c                   z   a a ] tR tRt oV3R lV 3R lltV3R lR ltR tV3R lR ltV3R	 lR
 ltRt	Vt
V ;t# )Phi2PreProcessori  c                ,   < V ^8  d   QhRS[ RS[RS[/# rP   rX   r   hidden_sizer   r   )r\   r)   s   "r   r]   Phi2PreProcessor.__annotate__  s"     9 9j 9S 9s 9r   c                Z   < \         SV `  V4       ^ V n        W n        W0n        RV n        R# )r  modeling_phi_PhiModel_model_1N)rb   rc   num_hidden_layersnum_attention_headsr  	func_namer   rX   r   r  rd   s   &&&&r   rc   Phi2PreProcessor.__init__  s+    !##, &8r   c                    < V ^8  d   QhRS[ /# )rP   return)dict)r\   r)   s   "r   r]   r  &  s      D r   c                   / pR VR&   RVR&   RVR&   RVR&   \        ^V P                  ^4       F1  pRV 2VR	V 2&   R
V 2VRV 2&   RV 2VRV R2&   RV 2VRV R2&   K3  	  V P                  P                  P                   Uu. uF  q3P
                  NK  	  ppRV9   d   RV9   d   RVR&   RVR&   V# RV9   d   RV9   g   Q hRVR&   RVR&   V# u upi )logits	lm_head_1	input_idsl_input_ids_
past_key_0
key_statespast_value_0value_states	past_key_key_states_past_value_value_states_present_key_model_layers__1present_value__1_1model_layers_0_1_1model_layers_0_1_2present_key_0present_value_0model_layers_0_1)ranger  rX   r   r   rs   )r   	edge_dictr   or   s   &    r   get_phi2_edge_dict#Phi2PreProcessor.get_phi2_edge_dict&  s<   	!)	+$/	.!".	,$2	.!q$00!4A-6qc?IA3'(/:1#->IaS)*1=aS/AIaS+,3A!1EIaS-.	 5 $(::#3#3#:#:;#:a66#:;7*/Cw/N.=I*+.?I*+
  &05IW5TTT,;I().?I*+ <s   Cc                    R pV P                   P                  P                   F;  pVP                  P	                  V4      pVR8w  g   K'  VP                  VR Vn        K=  	  R# ))modeling_phi_PhiDecoderLayer_model_layersNr@   )rX   r   r   op_typefind)r   phi2_transformer_layer_namer   indexs   &   r   simplify_phi2_op_type&Phi2PreProcessor.simplify_phi2_op_type<  sP    &Q#JJ$$))DLL%%&ABE{#||EF3 *r   c                    < V ^8  d   QhRS[ /# rg   ri   )r\   r)   s   "r   r]   r  C  s     {) {)_ {)r   c                j   V\         P                  8H  V n        V\         P                  8H  V n        V P
                  P                  p. pVP                   EF  pR VP                  9   Ed0   \        P                  ! VP                  V P                  '       g   \        P                  M\        P                  RR.R7      p\        P                  ! R\        P                  ^.R7      p\        P                  ! R\        P                  RR.R7      p\        P                  ! R\        P                  RR.R7      p\        P                  ! R\        P                  ^.R7      p	V P                  '       g   VP                  WVV.4      MVP                  WWV	.4       V P                  '       d   RVP                  9   d   \        P                  ! VP                  P                  RR	4      VP                   P"                  P$                  ^RV P&                  R
V P(                  V P&                  ,          .R7      p
VP                  V
.4       EK  EK  V P                  '       d   RVP                  9   dV   \        P                  ! VP                  VP                   P"                  P$                  . ROR7      p
VP                  V
.4       RVP                  9   dY   \        P                  ! VP                  VP                   P"                  P$                  . ROR7      p
VP                  V
.4       EK  EK  RVP                  9   g   RVP                  9   g   EK  \        P                  ! VP                  VP                   P"                  P$                  RV P&                  R
V P(                  V P&                  ,          .R7      p
VP                  V
.4       EK  	  VP+                  R4       VP                  P                  V4       . p\-        VP.                  4       EFe  w  rV^ 8X  d   VP                  V.4       K!  V P                  '       d   RVP                  9   d   \        P                  ! VP                  P                  RR4      VP                   P"                  P$                  ^RV P&                  RV P(                  V P&                  ,          .R7      p
VP                  V
.4       K  K  V P                  '       d   K  \        P                  ! VP                  VP                   P"                  P$                  RV P&                  RV P(                  V P&                  ,          .R7      p
VP                  V
.4       EKh  	  VP+                  R4       VP.                  P                  V4       R# )r(  
batch_sizeseq_lenr   stepposition_idsattention_maskinput_metadatapast_keypastpast_seq_len
past_valuer   present_keypresenttotal_seq_lenr   N)
num_blocksr   head_size_x
block_sizeblock_x)rX  r   r  rZ  )r   r  use_attnr
  use_vllmrX   r   r   rs   r   r   r   INT32r   r   replacer   r   r   r  r  
ClearFieldr   r   )r   rh   r   
new_inputsvivi_iidvi_stepvi_pidvi_maskvi_metavi_cachenew_outputsr   s   &&           r   process_graph_io!Phi2PreProcessor.process_graph_ioC  sy   $(A(AA$(F(FF

  
++Bbgg%66GG7;}}}k//+J[J['3
 !77)//#
  66")//'3
 !77$)//'3
 !77$)//#  === %%v&@A#**FG+DE}}}(%<<
F;"$''"5"5"?"?( 44* ,,0H0HH
 H %%xj1 ) (%<<"$''"5"5"?"?a H
 %%xj1277*%<<"$''"5"5"?"?	 H %%xj1 + (LBGG,C%<<"$''"5"5"?"?( 44* ,,0H0HH		 H %%xj1] ` 	!:&u||,EAAv""B4(===$/#)#@#@GGOOM9E&(gg&9&9&C&C ! , $ 8 8 / $ 0 0D4L4L L#
$ $**H:6 0 ]]]%<<"$''"5"5"?"?( 44+ ,,0H0HH		 H  &&z2? -B 	"K(r   c                    < V ^8  d   QhRS[ /# rg   ri   )r\   r)   s   "r   r]   r    s     , ,O ,r   c                   R pV P                   P                   F<  pVP                  P                  V P                  4      '       g   K0  VP                  p M	  Vf   Q hV P                  V4       V P                  V P                  4       4       V P                  4        V P                  4        V\        P                  8X  d   V P                  4        V P                  V4       R # rk   )rX   	functionsrs   rw   r  unroll_functionupdate_edgesr?  rG  remove_dropout_layerr   r
  remove_lm_head_layerrj  )r   rh   function_namefuncs   &&  r   preprocess_onnx Phi2PreProcessor.preprocess_onnx  s    JJ((Dyy!!$..11 $		 ) (((]+$1134""$!!#?999%%'l+r   )r  r  r  r  r\  r]  )r"   r#   r$   r%   rc   r?  rG  rj  ru  r&   r'   r  r  s   @@r   r  r    s:     9 9 ,4{) {)z, , ,r   r  c                   D   a a ] tR tRt oV3R lV 3R lltR tRtVtV ;t# )FissionTransformerEmbeddingPhii  c                    < V ^8  d   QhRS[ /# rP   rX   r   )r\   r)   s   "r   r]   +FissionTransformerEmbeddingPhi.__annotate__  s     \ \\r   c                *   < \         SV `  VR .4       R# )6torch_nn_modules_sparse_Embedding_model_embed_tokens_1Nra   r   rX   rd   s   &&r   rc   'FissionTransformerEmbeddingPhi.__init__  s     	!Y Z[r   c                   \         P                  R VP                  4       \        VP                  4      ^8X  g   Q h\        VP
                  4      ^8X  g   Q hVP                  ^ ,          pVP
                  ^ ,          pV P                  VR4      pWEV.p\        P                  ! RWd.V.RR7      .pV P                  V^ V4       V P                  P                  V4       RV n        R# )Optimizing %s...zembed_tokens.weightGatherEmbedding_Gatherr   TN)loggerinfors   rR   r   r   r   r   r   r   nodes_to_remover   prune_graph)	r   r   input_name_to_nodesoutput_name_to_noder   r   	embeddingr   r   s	   &&&&     r   fuse#FissionTransformerEmbeddingPhi.fuse  s    &		24::!###4;;1$$$

1Q**41FG	#()"< !)'	
 	**>1>UV##D)r   r  	r"   r#   r$   r%   rc   r  r&   r'   r  r  s   @@r   rx  rx    s     \ \   r   rx  c                   D   a a ] tR tRt oV3R lV 3R lltR tRtVtV ;t# )FissionTransformerLayerNormPhii  c                    < V ^8  d   QhRS[ /# rz  r   )r\   r)   s   "r   r]   +FissionTransformerLayerNormPhi.__annotate__  s     f ffr   c                *   < \         SV `  VR .4       R# )@torch_nn_modules_normalization_LayerNorm_model_final_layernorm_1Nra   r~  s   &&r   rc   'FissionTransformerLayerNormPhi.__init__  s     	!c der   c                Z   \         P                  R VP                  4       \        VP                  4      ^8X  g   Q h\        VP
                  4      ^8X  g   Q hVP                  ^ ,          pVP
                  ^ ,          pV P                  VR4      pV P                  VR4      pWEWg.p. p	V	P                  V P                  WFV.V.R4      4       V P                  V	^cV4       V P                  V. RO4       V P                  V. RO4       V P                  P                  V4       RV n        R# )r  zfinal_layernorm.weightzfinal_layernorm.biasFinalTNrK  rL  r  )r  r  rs   rR   r   r   r   r   r   r   r   r  r   r  )
r   r   r  r  r   r   	ln_weightln_biasr   r   s
   &&&&      r   r  #FissionTransformerLayerNormPhi.fuse  s   &		24::!###4;;1$$$

1Q**41IJ	((/EF#()"Ednne-H6(T[\]**>2?VW$$U,TU$$V-UV##D)r   r  r  r  s   @@r   r  r    s     f f   r   r  c                   D   a a ] tR tRt oV3R lV 3R lltR tRtVtV ;t# )!FissionTransformerCausalLMHeadPhii  c                    < V ^8  d   QhRS[ /# rz  r   )r\   r)   s   "r   r]   .FissionTransformerCausalLMHeadPhi.__annotate__  s     N NNr   c                *   < \         SV `  VR .4       R# )(torch_nn_modules_linear_Linear_lm_head_1Nra   r~  s   &&r   rc   *FissionTransformerCausalLMHeadPhi.__init__  s     	!K LMr   c                   \         P                  R VP                  4       \        VP                  4      ^8X  g   Q h\        VP
                  4      ^8X  g   Q hVP                  ^,          pVP
                  ^ ,          pV P                  V P                  VR4      \        4       4      pV P                  VR4      pWEWg.p. p	V	P                  V P                  WFV.V.R4      4       V P                  V	^cV4       V P                  V. RO4       V P                  V. RO4       V P                  P                  V4       RV n        R# )r  zlm_head.weightzlm_head.biasLMHead_TNr  )rK  rL  i   )r  r  rs   rR   r   r   r   r   r   r   r   r   r   r  r   r  )
r   r   r  r  r   r   	fc_weightfc_biasr   r   s
   &&&&      r   r  &FissionTransformerCausalLMHeadPhi.fuse  s   &		24::!###4;;1$$$

1Q,,T-C-CDJZ-[]m]op	((~>#()"Edii7(CfXyYZ**>2?VW$$U,TU$$V-MN##D)r   r  r  r  s   @@r   r  r    s     N N   r   r  c                   V   a a ] tR tRt oV3R lV 3R lltR tR tR tR tRt	Vt
V ;t# )	FissionTransformerBlockPhii4  c                &   < V ^8  d   QhRS[ RS[/# )rP   rX   r   )r   r   )r\   r)   s   "r   r]   'FissionTransformerBlockPhi.__annotate__5  s     / // /r   c                   < W n         ^ p/ V n        . p\        V4       F(  pRV R2pVP                  V4       WPP                  V&   K*  	  \        SV `  W4       R# )r  *modeling_phi_PhiDecoderLayer_model_layers_r4  N)r   func_to_layer_idr<  r   rb   rc   )r   rX   r   max_num_layersrY   layerr  rd   s   &&&    r   rc   #FissionTransformerBlockPhi.__init__5  sd    
 # ">*EDUG2NI  +/4!!), +
 	.r   c                <    V P                   VP                  ,          # rk   )r  rC  )r   r   s   &&r   get_layer_id'FissionTransformerBlockPhi.get_layer_idE  s    $$T\\22r   c                   \         P                  ! R R.R.R\        P                  R7      \         P                  ! RRR.R.RR	7      \         P                  ! R
RR.R.RR	7      \         P                  ! R R.R.R\        P                  R7      \         P                  ! RR.R.RR	7      \         P                  ! RRR.R.R^ R7      \         P                  ! R R.R.R\        P                  R7      .pV# )CastrO  
mask_int64Cast_gqa_aux_0)r   r   rs   to	ReduceSumonemask_row_sumsReduceSum_gqa_auxr   Subseqlens_k_int64Sub_gqa_aux	seqlens_kCast_gqa_aux_1Shape
mask_shapeShape_gqa_aux_0r  total_seq_len_int64Gather_gqa_aux_0)r   r   rs   axistotal_sequence_lengthCast_gqa_aux_2)r   r   r   r   r^  )r   gqa_aux_nodess   & r   get_gqa_aux_nodes,FissionTransformerBlockPhi.get_gqa_aux_nodesH  s   ()%%$$ $e,()(	 '/*+"	 )*$%$$ Wl^l^Zkl$e,./' -.01%$$G*
V r   c	                   V P                   P                  V4      p	V P                   P                  V4      p
V P                   P                  V4      p\        P                  ! \        P
                  ! V	4      R4      p\        P                  ! \        P
                  ! V
4      R4      p\        P                  ! \        P
                  ! V4      R4      p\        P                  ! WV3^R7      pV P                   P                  V4      pV P                   P                  V4      pV P                   P                  V4      p\        P
                  ! V4      p\        P
                  ! V4      p\        P
                  ! V4      p\        P                  ! VVV3^ R7      pVP                  ^ ,          p\        P                  ! V\        P                  VV^,          .VP                  4       P                  4       RR7      pV P                   P                  VV P                  4       \        P                  ! V\        P                  V^,          .VP                  4       P                  4       RR7      pV P                   P                  VV P                  4       V P!                  VP"                  4       V P!                  VP"                  4       Wx3# )r   )r  Tr   r   )rX   r   r   r   r
   r   stackrS   r   r   r   r   r   r   r   r   r   rs   )r   q_wk_wv_wq_bk_bv_bweight_name	bias_nameq_weightk_weightv_weightqwkwvw
qkv_weightq_biask_biasv_biasqbkbvbqkv_biasr  weightbiass   &&&&&&&&&                 r   pack_qkv_gemm(FissionTransformerBlockPhi.pack_qkv_gemmv  s   ::--c2::--c2::--c2\\+..x8&A\\+..x8&A\\+..x8&AXXrrl3
++C0++C0++C0!!&)!!&)!!&)88RRLq1 &&q)##!''{Q/##%--/
 	

""64+?+?@!!!''/"!!#++-
 	

""4)=)=>  -  +%%r   c                   \         P                  R VP                  4       \         P                  RV P                   24       V P	                  V4      pVP
                  ^ ,          pV P                  VR4      pV P                  VR4      pVP                  R;,          pV P                  VR4      p	V P                  VR4      p
V P                  VR4      pV P                  VR4      pR<w  rppppRRppRRppV P                  \        P                  8w  Ed
   V P                  V P                  VR	4      \        4       4      pV P                  V P                  VR
4      \        4       4      pV P                  V P                  VR4      \        4       4      pV P                  VR4      pV P                  VR4      pV P                  VR4      pV P                  V P                  VR4      \        4       4      pV P                  V P                  VR4      \        4       4      pMV P                  V P                  VR	4      V P                  VR
4      V P                  VR4      V P                  VR4      V P                  VR4      V P                  VR4      V P                  VR4      V P                  VR4      4      w  ppV P                  V P                  VR4      \        4       4      pV P                  VR4      pV P                  V P                  VR4      \        4       4      pV P                  V P                  VR4      \        4       4      pV P                  VR4      pV P                  VR4      p. pVP!                  WVV.4       VP!                  WV
.4       VP!                  W.4       V P                  \        P                  8w  d   VP!                  VVVVVVVV.4       MVP!                  VV.4       VP!                  VVVVVV.4       VP!                  . R=O4       . pVP!                  V P#                  W[V.R.4      4       VP!                  V P%                  R VV.R!.R"4      4       VP!                  V P%                  RVV.R#.R$4      4       VP!                  V P'                  R#.R%.4      4       VP!                  V P%                  R%VV.R&.R'4      4       VP!                  V P)                  R!R&.R(.R)4      4       VP!                  V P)                  VR(.V.R*4      4       V P                  \        P                  8w  Edp   VP!                  V P%                  RW.R+.R,4      4       VP!                  V P%                  RVV.R-.R.4      4       VP!                  V P%                  RVV.R/.R04      4       V P                  \        P*                  8X  d   RMRpVP!                  V P-                  R+VVV.R1.R,4      4       VP!                  V P-                  R-VVV.R2.R.4      4       V P                  \        P.                  8X  d-   VP!                  V P1                  R1R2R/R3RR3Wg.R W.4      4       EMV P                  \        P2                  8X  d   VP!                  V P5                  R1R2R/VVRR.R W.4      4       V^ 8X  d   V P7                  4       p V  FA  p!V P8                  P;                  V!4       V P<                  V P>                  V!P                  &   KC  	  V P@                  PC                  \D        PF                  ! \H        PJ                  ! ^.R4R57      R6R77      V P<                  4       MV P                  \        P*                  8X  d(   VP!                  V PM                  R1R2R/WgR.R .4      4       MER8V 2p"R9V 2p#VP!                  V"V#.4       VP!                  V PO                  RVVRV".R V#.4      4       V PQ                  VVV4       V PS                  V. R>O4       V PS                  V. R>O4       V PT                  P;                  V4       R:V n+        R# )?r  zAttentionOpType: rQ  rT  rU  present_valuezinput_layernorm.weightzinput_layernorm.biasNzself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedattn_qkv_weightattn_qkv_biaszself_attn.dense.weightzself_attn.dense.biaszmlp.fc1.weightzmlp.fc2.weightzmlp.fc1.biaszmlp.fc2.biasrO  rM  r  r  rP  rN  ln_outattn_outattn_add_outOutProj_fc1_outFC1_gelu_outfc2_outFC2_residual_1_out
Residual_1
Residual_2queryQ_keyK_valueV_	query_rotkey_rotr   int64)dtyper  )rs   past_present_Tr@   )NNNNNN)rO  rM  r  r  rP  rN  r  ),r  r  rs   rh   r  r   r   r   r   r   r  r   r   rN   r  rt   r   r   r   r   r   r
  r   r   r   r   r   r  r   r   r   r   rX   r   r   
from_arrayr   arrayr  r  r   r   r  r  )$r   r   r  r  rr   i_hidden_statesi_key_cachei_value_cacheo_hidden_stateso_key_cacheo_value_cacher  r  attn_q_weightattn_q_biasattn_k_weightattn_k_biasattn_v_weightattn_v_biasr  r  	cos_cache	sin_cacheattn_out_weightattn_out_biasmlp_fc1_weightmlp_fc2_weightmlp_fc1_biasmlp_fc2_biasr   r   pos_ids_namer  r   	past_namepresent_names$   &&&&                                r   r  FissionTransformerBlockPhi.fuse  s,    	&		2'(9(9':;<$$T***Q-,,T:>..t\B++b/--dMB//oF**41IJ	((/EF^
ZM;{ *.t#T9	 9 99 44&&t-FGIYI[M !44&&t-FGIYI[M !44&&t-FGIYI[M 007NOK007NOK007NOK00&&t-DEGZG\I 00&&t-DEGZG\I .2-?-?&&t-FG&&t-FG&&t-FG&&t-DE&&t-DE&&t-DEx):;x9	.*O] 22""4)ABDTDV
 ..t5KL11$2H2HO_2`brbtu11$2H2HO_2`brbtu--dNC--dNC"$&&m'TU&&m'TU&&	';< 9 99#**!!!	 $**O]+KL&&m^\>[gh	
 	 &&n	
 dnno'-RU]T^_`dii_m(TWeVfhrstdii><(PS\R]_efgdmmYK*FGdii^\(RU^T_aghidhh	'BEUDVXdefdhh9I'J_L]_klm 9 99!!$))X},RU\T]_c"de!!$))X}k,RUZT[]a"bc!!$))X}k,RU\T]_c"de-1->->/B`B`-`>flL!!$++wiQZ.[^i]jlp"qr!!$++ulIy.Y\e[fhl"mn  O$F$FF%%HH$i">NPRT_o#[@ ""o&I&II%%HH'%#')'3 $[@ q=$($:$:$<M$1))00:FJFZFZ44X]]C %2 JJ..$//!G0LSXY[_[o[o ""o&D&DD%%OO$i+Vfg#  z*I%hZ0L#**I|+DE!!?OQZ[^hjv]w 	**>8E\]$$_6^_$$_6^_##D)r   )r  r   r  )r"   r#   r$   r%   rc   r  r  r  r  r&   r'   r  r  s   @@r   r  r  4  s,     / / 3,\(&Tk  k r   r  c                   j   a a ] tR tRt oV3R lV 3R lltR	V3R lV 3R llltR tR
R ltRtVt	V ;t
# )PhiOnnxModeliN  c                ,   < V ^8  d   QhRS[ RS[RS[/# r  r  )r\   r)   s   "r   r]   PhiOnnxModel.__annotate__O  s'     R Rj RS Rs Rr   c                   < \         SV `  V4       \        V P                  W#4      V n        \        W4      V n        \        V 4      V n        \        V 4      V n
        \        V 4      V n        R # rk   )rb   rc   r  rX   phi2_preprocessorr  fission_transformer_blockr  fission_causal_lm_headr  fission_transformer_layernormrx  fission_transformer_embeddingr   s   &&&&r   rc   PhiOnnxModel.__init__O  sW    !1$**i!U)CD)T&&G&M#-KD-Q*-KD-Q*r   c                4   < V ^8  d   QhRS[ R,          RS[/# )rP   optionsNadd_dynamic_axes)r   bool)r\   r)   s   "r   r]   r#  W  s      # # 4 #t #r   c                ,  < Vf   Q hVP                   pV P                  P                  V4       V P                  P	                  V4       V P                  P                  4        V P                  P                  4        V P                  P                  4        V P                  P                  4        \        SV `)  4        \        V 4      V n        \        V 4      V n        V P                  P                  4        V P                  P                  4        R # rk   )attention_op_typer&  rm   r%  ru  applyr(  r'  r)  rb   r  r	   fuse_slnr   fuse_bias_sln)r   r,  r-  rh   rd   s   &&& r   optimizePhiOnnxModel.optimizeW  s    """00&&<<\J..|<&&,,.**002##))+**002 5T:=dC  "r   c                    / p. ROpV F!  pV P                  V4      p\        V4      W&   K#  	  \        P                  RV 24       V# )z(
Returns node count of fused operators.
zOptimized operators: )	r  r   r   r
  GeluBiasGelur   r   SkipLayerNormalization)get_nodes_by_op_typerR   r  r  )r   op_countopsopnodess   &    r   get_fused_operator_statistics*PhiOnnxModel.get_fused_operator_statisticsl  sR     

 B--b1Eu:HL  	+H:67r   c                  a Sf   V P                  4       oR V3R llpV! R4      V! R4      ,           V! R4      ,           V! R4      ,           pV! R4      V! R4      ,           V! R	4      ,           pV! R
4      V! R4      ,           pV^ 8  ;'       d    W48H  ;'       d    WS8  pV^ 8X  d   \        P                  R4       V^ 8X  d   \        P                  R4       V^ 8X  d   \        P                  R4       V# )z1
Returns True when the model is fully optimized.
c                $    V ^8  d   QhR\         /# )rP   op_namerq   )r\   s   "r   r]   5PhiOnnxModel.is_fully_optimized.<locals>.__annotate__  s     	4 	4c 	4r   c                 :   < SP                  V 4      ;'       g    ^ # )r   )get)rC  fused_op_counts   &r   r;  1PhiOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.33!3r   r  r   r   r
  r7  r8  r   r   r9  zLayer Normalization not fusedzGelu (or FastGelu) not fusedz+Attention (or MultiHeadAttention) not fused)r?  r  debugwarning)r   rG  r;  r  gelu
layer_norm
is_perfects   &f     r   is_fully_optimizedPhiOnnxModel.is_fully_optimized  s     !!??AN	4 	4 [!+,-,-. '() 	 (:"66*9MM23h?W6XX
!mZZ)*;ZZ*BY
?LL8919LL78>NNHIr   )r'  r&  r)  r(  r3  r2  r%  )NFrk   )r"   r#   r$   r%   rc   r4  r?  rN  r&   r'   r  r  s   @@r   r!  r!  N  s*     R R# #*. r   r!  ))loggingr   numpyr   dynamo_onnx_helperr   fusion_baser   fusion_optionsr   r   fusion_skiplayernormr   r	   fusion_utilsr
   onnxr   r   r   r   r   
onnx_modelr   r"   r  r   r+   r5   r9   r=   rF   rJ   rN   rV   r  rx  r  r  r  r!  r    r   r   <module>rY     s      /  9 _ $ I I  	8	' '
: :
: :
: :
    Vf Vrp,' p,f W  D W  B   BW  W tS9 Sr   