+
    9i@#                        ^ RI t ^ RIt^ RIt^ RIt^ RIHt ^ RIt^ RI	H
t
HtHtHt ^ RIHt ^RIHt ^RIHt ]P(                  ! ]4      t ! R R4      tR t]R	8X  d   ]! 4       t]P4                  '       d   ]P7                  ]P8                  4       ]P:                  t]P>                  t ]PB                  PE                  ] 4      '       d"   ]PG                  R
]  R24       ]$! R
]  R24      h]PJ                  ! ]4      t&]! ]&]PN                  ]PP                  ]PR                  R7      t*]*PW                  4        ]*PL                  PY                  ] R4       R# R# )    N)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4)	ONNXModel)attribute_to_kwargc                      a  ] tR t^t o Rt^ t^tRV 3R lR llt]V 3R lR l4       t	V 3R lR lt
V 3R	 lR
 ltV 3R lR ltR tRtV tR# )MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typeNc                ,   < V ^8  d   QhRS[ RS[RS[/# )   model
quant_type
block_size)r   int)format__classdict__s   "l/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__annotate__ MatMulBnb4Quantizer.__annotate__%   s"     6 6j 6c 6s 6    c                    T;'       g    . pV\         P                  \         P                  39   g   Q h\        V4      V n        W n        W0n        \        V4      V n        R # N)	r   FP4NF4r   r   r   r   setnodes_to_exclude)selfr   r   r   r   s   &&&&&r   __init__MatMulBnb4Quantizer.__init__%   sU    +11r1557J7N7NOOOOu%
$$ #$4 5r   c                L   < V ^8  d   QhRS[ S[,          RS[S[S[3,          /# )r   
graph_pathreturn)listr   tupler   )r   r   s   "r   r   r   .   s+      D,< {T^G^A_ r   c                    \        \        V4      ^,
          RR4       F6  pW,          pVP                   F  pVP                  V 8X  g   K  WC3u u # 	  K8  	  R# )   )NN)rangeleninitializername)r,   r"   gidgraphtensors   &&   r   __get_initializer%MatMulBnb4Quantizer.__get_initializer-   sQ    Z1,b"5COE++;;$&!=( , 6
 r   c                N   < V ^8  d   QhRS[ P                  RS[P                  /# )r   fpweightr#   )npt	ArrayLikenpndarray)r   r   s   "r   r   r   6   s#          2::  r   c           	        \        VP                  4      ^8w  d   \        R4      hVP                  4       P	                  4       pVP                  w  r4W4,          pV P
                  pWV,           ^,
          V,          pV^,           ^,          p\        P                  ! VRR7      p	\        P                  ! WqP                  R7      p
\        WWV P                  WC4       W3# )z4b quantize fp32/fp16 weightz9Current bnb4 block quantization only supports 2D tensors!uint8)dtype)r*   shape
ValueError	transposecopyr   r6   zerosr:   r   r   )r   r3   
fpweight_trowscolsnumelr   
num_blocksquantized_numelpackedabsmaxs   &&         r   bnb4_block_quant$MatMulBnb4Quantizer.bnb4_block_quant6   s     x~~!#XYY '')..0
^^
__
(1,;
 19*/9*NN;VT__VZar   c                <   < V ^8  d   QhRS[ RS[S[,          RS[ /# )r   nodegraph_stackr#   )r   r$   r   )r   r   s   "r   r   r   L   s(     5  5 Y 5 T*EU 5 Zc 5 r   c                   VP                   R8w  d   V# \        P                  RVP                   R24       VP                  V P                  9   d&   \        P                  RVP                   R24       V# VP
                  ^,          p\        P                  W24      w  rEVf   \        P                  R4       V# \        P                  P                  V4      p\        VP                  4      ^8w  d   \        P                  R4       V# V P                  V4      w  rx\        P                  P                  V4      p	VP                  R,           V	n        VP
                   F1  p
V
P                  V8X  g   K  VP
                  P                  V
4        M	  \        P                  P                  V4      pVP                  R	,           Vn        VP                   P#                  W.4       / pVP                  w  rWR
&   WR&   V P$                  VR&   V P&                  VR&   \        P(                  P*                  ! RRVP
                  ^ ,          V	P                  VP                  .RVP,                  ^ ,          .RVP                  '       d   VP                  R,           MRRR/VB p\        P                  RVP                   R24       V# )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...z2MatMul doesn't have const weight. Skip to quantizez)MatMul weight is not 2D. Skip to quantize_Bnb4_absmaxKNr   r   inputsoutputsr,    domaincom.microsoftzcomplete quantization of )
MatMulBnb4)op_typeloggerdebugr,   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxnumpy_helperto_arrayr*   r;   rH   
from_arrayremover+   extendr   r   helper	make_nodeoutput)r   rK   rL   inputBBBs_graphB_arrayrF   rG   B_quantr\   absmax_tensorkwargsrA   rB   matmul_bnb4_nodes   &&&             r   _bnb4_matmul_node_weight,MatMulBnb4Quantizer._bnb4_matmul_node_weightL   sO    <<8#K)$))D9:99---LL/		{:^_`KA);;FP9LLMNK##,,Q/w}}"LLDEK..w7##..v6vv'^^EzzV#%%e, $
 ))44V<VVi/##W$<=]]
ss#|#|;;00 
JJqM7<<1C1CD
 [[^$
 )-			W$r	

 #
 
 	04@Ar   c                0   < V ^8  d   QhRS[ S[,          /# )r   rL   )r$   r   )r   r   s   "r   r   r      s     $ $T*-= $r   c                   . pVR,          pVP                    EF  pVP                   Uu. uFY  pVP                  \        P                  P
                  8X  g,   VP                  \        P                  P                  8X  g   KW  VNK[  	  ppV'       Edb   / pVP                   EF  pVP                  \        P                  P
                  8X  d:   VP                  VP                  4       VP                  V P                  V4      /pMVP                  \        P                  P                  8X  dW   . p	VP                   F5  p
VP                  V
4       V	P                  V P                  V4      .4       K7  	  VP                  V	/pM\        V4      pVP                  V4       EK  	  \        P                  P                   ! VP"                  VP$                  VP&                  3RVP                  /VB pVP                  V P)                  WA4      4       EK   	  VP+                  R4       VP                   P                  V4       VP-                  4        V# u upi )r'   r,   rK   r(   )rK   	attributetyper^   AttributeProtoGRAPHGRAPHSappendgr,   _process_subgraphgraphsrc   r	   updaterd   re   rY   r\   rf   ro   
ClearFieldpop)r   rL   	new_nodesr.   rK   attrgraph_attrsrm   kvvaluesubgraphs   &&         r   rz   %MatMulBnb4Quantizer._process_subgraph   s   	BJJD !NN*D99 3 3 9 99TYY$J]J]JdJd=d *  
 { NNDyyD$7$7$=$==#**4662"ii)?)?)LMd&9&9&@&@@ "(,H'..x8!LL$*@*@*M)NO )4 #ii//5MM"% + {{,,LL$**dkk@D		MS T::4MN7 : 	 

)$?s   AIIc                v   V P                   P                  4       .pV P                   P                  4       pR pV F  pVP                  R8X  g   K  RpK  	  V'       g1   VP	                  \
        P                  P                  R^4      .4       V P                  V4       V P                   P                  4        R# )FrW   TN)
r   r.   opset_importrV   rc   r^   rd   make_opsetidrz   clean_initializers)r   rL   r   has_ms_domainopsets   &    r   processMatMulBnb4Quantizer.process   s    zz'')*zz..0!E||. $ " !9!9/1!M NO{+

%%'r   )r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   r   r   staticmethodr]   rH   ro   rz   r   __static_attributes____classdictcell__)r   s   @r   r   r      s^     W C C6 6     ,5  5 n$ $L( (r   r   c            	         \         P                  ! R R7      p V P                  RRRR7       V P                  RRRR7       V P                  RR	^\        P                  \        P
                  .R
R7       V P                  RR	^@RR7       V P                  RRR	RR7       V P                  R	R7       V P                  RR\        R	. RR7       V P                  4       # )a  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFz&Quantization data type. 0: FP4, 1: NF4)r   defaultchoicesr   z--block_sizezVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r   r   r   z-vz	--verbose
store_true)r   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrt   r   r   r   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parsers    r   r   r      s    $$F $=[\
(4>]^
$((*=*A*AB5   e	   kE,O
&
Q   r   __main__zfile z already exists)r   T)-r   loggingosnumpyr6   numpy.typingtypingr4   r^   onnx.onnx_pbr   r   r   r   onnxruntime.capi._pybind_stater   
onnx_modelr   quant_utilsr	   	getLoggerr   rZ   r   r   argsr   setLevelDEBUGinput_modelinput_model_pathoutput_modeloutput_model_pathpathexistserror	Exceptionloadr   r   r   r   quantr   save_model_to_file r   r   <module>r      s     	    G G ? ! +			8	$^( ^(B$N z<D|||&''))	ww~~'((u./?@% 12/BCCII&'EtZ^ZoZopE	MMO	KK""#4d; r   