+
    9iF                         ^ RI t ^ RIt^ RIt^ RIt^ RIHt ^RIHtH	t	 ^RI
Ht ^RIHt ^RIHtHtHtHtHtHtHtHtHtHtHtHtHtHtHtHtHt ^RI H!t!  ! R R	]4      t"R# )
    N)onnx_pb)BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domainquantize_onnx_initializer&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                     a  ] tR t^&t o R"R ltR tR tR tR tR t	R t
R	 tR#R
 ltR tR tR tR tR$R ltR%R ltR tR"R ltV 3R lR ltV 3R lR ltR&R ltR tR#R ltR'R ltR(R ltR)R ltR*R ltR tR tR  t R!t!V t"R# )+ONNXQuantizerNc                r   \         P                  ! V VVVVVVV	V
VV4       V'       Eg   V P                  P                  4        \	        V P                  P                  4      pVP
                  P                   Uu/ uF  qP                  VbK  	  upV n        V P                  P                  VP
                  P                   Uu/ uF  qP                  VbK  	  up4       V P                  P                  VP
                  P                   Uu/ uF  qP                  VbK  	  up4       \        V4      V n        W@n        WPn        V P                  ^
8  V n        RV P"                  9   ;'       d    V P"                  R,          V n        . V n        RV n        / V n        V P*                  P                  VP
                  P                   Uu/ uF  qP                  ^bK  	  up4       V P*                  P                  VP
                  P                   Uu/ uF  qP                  ^bK  	  up4       V P                  P                  P
                  P,                   F<  pV P*                  P                  \.        P1                  VP                  ^4      4       K>  	  V P                  \2        9  d   \5        RV P                   24      hV P7                  4       V n        RV n        RV n        RV n        RV n         / V n!        V P                  PE                  4       V n#        R# u upi u upi u upi u upi u upi )	
   MatMulConstBOnly/zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zpN)$r   __init__modelreplace_gemm_with_matmulr   graph
value_infonamevalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnodedictfromkeysr	   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr%   per_channelreduce_ranger.   r/   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer2   viotitr7   s   &&&&&&&&&&&&&    e/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/quantization/onnx_quantizer.pyr$   ONNXQuantizer.__init__'   s    	 	
 vJJ//1:4::;K;KLE6;kk6L6LM6L6LMD##5;;;M;M$N;MRWWb[;M$NO##5;;;L;L$M;LRWWb[;L$MN"5)DJ	"&"4"4r"9%74;M;M%M%x%xRVRdRdewRx"  u{{7I7I!J7I''1*7I!JK  u{{7H7H!I7H''1*7H!IJJJ$$**//D$$T]]4;;%BC 0 99,,=dii[IJJ#'#E#E#G  (H$&E#+"1 $&  &*ZZ%J%J%L"K  N$N$M "K!Is   ;L L%L*L/L4c                8   \         P                  P                  VRV P                  P                  P                  R7      p\        V4       \        VV P                  V P                  V P                  V P                  V P                  V P                  V P                  V P                  V P                  V P                   V P"                  4      pWn        V P&                   V R2Vn        VP)                  4        VP                  P                  P*                  # )z
generate submodel for the subgraph, so that we re-utilize current quantization implementation.
quantize the submodel
update subgraph and set it back to node
onnx-quantizer)producer_nameopset_importsr   )onnxhelper
make_modelr%   opset_importr   r   rE   rF   r.   r/   rG   rH   rI   rJ   rK   rL   r2   parentr5   quantize_modelr'   )rD   subgraph	graph_keywarped_modelsub_quantizers   &&&  rP   quantize_subgraphONNXQuantizer.quantize_subgraphp   s     {{--*****77 . 

 	<(%IIKK!!""!!%%
  $'+'7'7&81$E!$$&""((...    c                h   VP                    Uu. uFY  pVP                  \        P                  P                  8X  g,   VP                  \        P                  P
                  8X  g   KW  VNK[  	  pp\        V4      ^ 8X  d   V# VP                  '       d   VP                  M#VP                   R\        V P                  4       2p/ pVP                    EF  pVP                  \        P                  P                  8X  d9   VP                  V P                  VP                  V RVP                   24      /pMVP                  \        P                  P
                  8X  db   . pVP                   F@  pVP                  V P                  VV RVP                   R\        V4       24      .4       KB  	  VP                  V/pM\        V4      pVP                  V4       EK  	  \        P                   P"                  ! VP                  VP$                  VP&                  3RVP                  /VB # u upi )zd
Check subgraph, if any, quantize it and replace it.
return new_nodes added for quantizing subgraph
_node_count_:r)   )	attributetyperV   AttributeProtoGRAPHGRAPHSlenr)   op_typer4   r`   ggraphsextendr   r+   rW   	make_noder-   r,   )	rD   r7   attrgraph_attrs	node_namekwargskvvaluer\   s	   &&       rP   quantize_node_with_sub_graph*ONNXQuantizer.quantize_node_with_sub_graph   s    
&yyD//555dFYFYF`F`9` D& 	 

 {q K!%DII4<<.SQUQ_Q_M`La0b	NNDyyD//555ii!7!79+Qtyyk@Z![\d11888 $HLL 22 (#,+Qtyyk3u:, G !, ii''-MM"# #$ {{$$T\\4::t{{eQUQZQZe^dee7
s   AH/(H/c                    \         ;QJ d4    R V P                  P                  4        4       F  '       g   K   R# 	  R# ! R V P                  P                  4        4       4      # )zA
Detect if model already has QuantizeLinear or DequantizeLinear.
c              3   l   "   T F*  qP                   R 8H  ;'       g    VP                   R8H  x  K,  	  R# 5i)QuantizeLinearDequantizeLinearN)rl   ).0r7   s   & rP   	<genexpr>.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   s2      
_qW[LL,,RR@R0RR_qs   44TF)anyr%   nodes)rD   s   &rP   has_QDQ_nodesONNXQuantizer.has_QDQ_nodes   s]     s 
_c_i_i_o_o_q
ss 	
s 	
s 
_c_i_i_o_o_q
 
 	
rb   c                    \        WP                  P                  4       4      e   R# V P                  e   V P                  P	                  V4      # R# )NTF)r   r%   initializerrZ   find_initializer_in_path)rD   initializer_names   &&rP   r   &ONNXQuantizer.find_initializer_in_path   sA    (***@*@*BCO;;";;778HIIrb   c                    V P                   P                  V4       V F1  pVP                   F  pV P                  P	                  V4       K   	  K3  	  R # N)r4   ro   r,   rC   add)rD   r   r7   output_names   &&  rP   add_new_nodesONNXQuantizer.add_new_nodes   s@    e$D#{{**..{;  + rb   c                   V P                  4       '       d   \        P                  ! R 4       V P                  P	                  4        F  pV P
                  '       d   V P                  V4      p\        V P                  4      p\        W4      pVP                  4        \        V\        V P                  4      4       FB  pV P                  V,          P                   F  pV P                  P                  V4       K   	  KD  	  K  	  V P                  4        V P                  P!                  4       P#                  R4       V P                  P!                  4       P$                  P'                  V P                  4       V P(                  fH   V P                  P+                  4       w  rg\        V4      ^ 8  d   \-        R\/        V4      ,           4      h\0        V P                  P                  n        \4        V P                  P                  n        V P                  P                  P8                   Uu. uF  qP:                  \<        8X  g   K  VNK  	  p	pV	'       gv   V P                   Uu. uF  qP:                  R8X  g   K  VNK  	  p
pV
'       dA   V P                  P                  P8                  P                  4       p^Vn        \<        Vn        V P                  P                  # u upi u upi )zPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.r7   z0Invalid model with unknown initializers/tensors.zcom.microsoft) r   loggingwarningr%   r   enable_subgraph_quantizationrw   rk   r4   r   quantizeranger,   rC   r   _dequantize_outputsr'   
ClearFieldr7   ro   rZ   clean_initializersRuntimeErrorstrr   rT   r   producer_versionrY   domainr   version)rD   r7   number_of_existing_new_nodesop_quantizerir   _initializers_not_foundopsetms_opsetms_nodess   &          rP   r[   ONNXQuantizer.quantize_model   s!   OOn
 JJ$$&D00088>+.t~~+>(,T8L!!#7T^^9LM#'>>!#4#;#;K..22;? $< N ' 	  " 	

%%f-

&&t~~6 ;;(,

(E(E(G%A)*Q."#UX[\rXs#stt)5

&,7

)'+zz'7'7'D'Db'DeXaHaEE'Db)-Z;;/;YHZ

((5599; !(zz cZs   1K'K'*K,K,c                    R V P                   9   d=   \        P                  ! RVV P                   R ,          4       V P                   R ,          # \        RV: R24      h)DefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)r2   r   infor   rD   tensor_names   &&rP   _get_default_tensor_type&ONNXQuantizer._get_default_tensor_type   sf    $"4"44LLV""#67
 %%&9::7 GI J
 	
rb   c                $   \        WP                  P                  4       4      pVe   VP                  # WP                  9   d   V P                  V,          pVP
                  P                  R4      '       d_   V'       d7   VP
                  P                  P                  ^ 8X  d   V P                  V4      # VP
                  P                  P                  # V P                  '       d   V P                  f   V'       d   V P                  V4      # R # V P                  P                  V4      pVe   V# V P                  '       d4   V P                  '       d"   V P                  P                  V4      pVe   V# V'       d   V P                  V4      # R # )Ntensor_type)r   r%   r   	data_typer*   rg   HasFieldr   	elem_typer   r   rZ   is_valid_quantize_weightget_tensor_type)rD   r   	mandatoryweightrM   otyperess   &&&    rP   r   ONNXQuantizer.get_tensor_type  s1   k::+A+A+CD###***!!+.Bww..!4!4!>!>!!C88EEww**444111t{{7J44[AA44[AL,,,++--k:C
00==rb   c                   V P                  V4      '       d   V P                  V4      # WP                  9   d   V P                  V,          pVP                  P	                  R 4      '       dZ   VP                  P
                  P                  \        P                  P                  \        P                  P                  39   d   R# \        P                  ! RV: RVP                   R24       R# V P                  '       d.   V P                  '       d   V P                  P                  V4      # \        P                  ! RV: R24       R# )r   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r*   rg   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   rZ   is_float_tensor)rD   r   rM   s   && rP   r   ONNXQuantizer.is_float_tensor  s   &&{3300==***!!+.Bww..2773F3F3P3P&&,,&&..U 4 OON{o]ghjhohogppqr ,,,;;..{;;3K? C6 7	
 rb   c                    V\         P                  P                  8X  d   V P                  WV4      # V\         P                  P                  8X  d   V P                  WV4      # \        RV R24      h)a\  
Create nodes for dynamic quantization of input and add them to nodes_list.
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter qType: type to quantize to.
    parameter initial_type: type to quantize from
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r:   )rD   
input_name
nodes_listqTypeinitial_types   &&&&&rP   &_get_dynamic_input_quantization_params4ONNXQuantizer._get_dynamic_input_quantization_params6  si     J**///CCJ\hiiJ**000DDZ]ijj6ugQ?@@rb   c                   \         P                  P                  pVR,           pVR,           p\        P                  P                  RV.VR,           .V^ R7      pVP                  V4       VR,           p\        P                  P                  RV.VR,           .V^ R7      p	VP                  V	4       VR,           p
\        P                  P                  R	VP                  ^ ,          .V
R,           .V
4      pVP                  V4       VR,           p\        P                  P                  R	V	P                  ^ ,          .VR,           .V4      pVP                  V4       VR
,           p\        P                  P                  RVP                  ^ ,          VP                  ^ ,          .VR,           .V4      pVP                  V4       \        P                  P                  V P                  V. \        V4      R,          .4      pV P                  P                  V4       VR,           p\        P                  P                  RVP                  ^ ,          V P                  .V.V4      pVP                  V4       \        P                  P                  V P                  V. ^ .4      pV P                  P                  V4       WPP                  . . 3# )aJ  
Create nodes for dynamic quantization of input to int8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter initial_type: initial weight type (FLOAT or FLOAT16)
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
_scale
_ReduceMin	ReduceMin:0keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMax       @	scale_DivDiv)r   r   r   rV   rW   rp   appendr,   make_tensorr>   r   r%   add_initializerr@   )rD   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps   &&&&                rP   r   9ONNXQuantizer._get_dynamic_input_quantization_params_int8E  s    &&++ &0$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* .6"kk33##A&' 4'(	
 	-.-6"kk33##A&' 4'(	
 	-.!J.{{,, ''*,?,F,Fq,IJD !	
 	,'++11''!%(3./	
 	

""?3#k1..  #T%@%@A	
 	.) 001H1H%QSVWUXY

"">2!8!8"b@@rb   c                   \         P                  P                  pVR,           pVR,           pVR,           p\        P                  P                  RV.VR,           .V^ R7      pVP                  V4       VR,           p	\        P                  P                  RV.V	R,           .V	^ R7      p
VP                  V
4       \        P                  P                  V P                  V. \        V4      .4      pV P                  P                  V4       \        P                  P                  V P                  V. R	.4      pV P                  P                  V4       VR
,           p\        P                  P                  RV
P                  ^ ,          VP                  ^ ,          .VR,           .V4      pVP                  V4       VR,           p\        P                  P                  RVP                  ^ ,          V P                  .V.V4      pVP                  V4       VR,           p\        P                  P                  RV P                  VP                  ^ ,          .VR,           .V4      pVP                  V4       VR,           p\        P                  P                  RVP                  ^ ,          V.VR,           .V4      pVP                  V4       VR,           p\        P                  P                  RVP                  VR,           .V4      pVP                  V4       VR,           p\        P                  P                  RVP                  V.VVR7      pVP                  V4       WV. . 3# )aK  
Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
    parameter input_name: Name of the input.
    parameter nodes_list: new nodes are appended to this list.
    parameter initial_type: initial weight type (FLAOT or FLOAT16)
    return: scale_name, zero_point_name, scale_shape, zero_point_shape.
r   _zero_pointr   r   r   r   r   r           
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rV   rW   rp   r   r   r=   r   r%   r   r?   r,   )rD   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes   &&&&                     rP   r   :ONNXQuantizer._get_dynamic_input_quantization_params_uint8  s4    &&,,%0"]2$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* "[[44((!%()	
 	

""#56![[44T5I5I<Y[^a]bc

""#56 $l2..##A&(>(>q(ABd"#	
 	.)#l2..""1%t'C'CD	
 	.) !#44kk++!!?#9#9!#<=4 	
 	+& #44kk++"$454 	
 	+&"%88--g{7I7IM\`L`Kacpq-(!$66{{,,V]5I5IM?\hmr,s,'B66rb   c                   V P                   pVe   VEf}   V P                  e   WP                  9  d   \        P                  ! RV R24       R# V P                  V,          p\	        V\
        4      '       g   \        R\        V4       RV: R24      hVe   \        V4      ^8w  d   \        RV RV 24      h\        P                  ! VR,          .4      p\        VR	,          R
4      '       d7   VR	,          P                  \        P                  \        P                  39  d#   \        R\        VR	,          4       RV: 24      h\        P                  ! VR	,          .4      pVP                  \        P                   8w  g   Q hVR,          pM\        P                  ! V.4      p\        P                  ! V.4      pV P                  V,          pR	V9   d%   VR	,          P                  pVP#                  V4      pVP                  \        P                   8w  g   Q h. p	VR,           p
. pVR,           p\$        P&                  P)                  WWP+                  4       P-                  4       4      pV P.                  P1                  V4       VP                  \        P                  8X  d   \2        P4                  P6                  pMVVP                  \        P                  8X  d   \2        P4                  P8                  pM\        RVP                   RV: 24      h\$        P&                  P)                  WWP;                  R4      P-                  4       4      pV P.                  P1                  V4       RWW3# )a4  
Create initializers and inputs in the graph for zero point and scale of output.
Zero point and scale values are obtained from self.quantization_params if specified.
    parameter param_name: Name of the quantization parameter.
    return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
z$Quantization parameters for tensor:"z" not specifiedUnexpected type  for r   zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=T)F r  r  r  ))rH   r<   r   r   
isinstancer   	TypeErrorrg   rk   r:   nparrayhasattrr  float32float16float64astyperV   rW   r   raveltolistr%   r   r   r   r   r   reshape)rD   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr  zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales   &&&&            rP   _get_quantization_params&ONNXQuantizer._get_quantization_params  s    // 5''/:E]E]3]CJ<_`,,--j9Ff&899"24<.j^ST UVV~V!1 33=,bJ 
 !#&*>)? @6'?G44w8M8MVXV`V`bdblblUm8m #3D4I3JJZ[eZh!ijj88VG_$56L%%333$\2O "- 988YK0L--j9F& w--+2259%%333$}4(*
 ++)).>@W@W@Y@`@`@b
 	

""7++#//55J2::-#//77J01C1C0DDTU_Tbcdd[[,,Z[RfRfglRmRtRtRvw


"":.Z+OOrb   c           	        VP                   V,          pVR8w  g   Q R4       hV\        ,           pVR,           p	Ve
   Ve   RYErp
MV P                  V4      w  rp p. pV
'       d'   \        P                  P                  RW{V.V.V	4      pMV P                  '       d   R# V P                  '       dX   V\        P                  P                  8X  d9   VR,           pVR,           p\        P                  P                  R	V.WV.V	4      pMUVf   Q R
V: RV RV RV 24       hV P                  W~W6R7      w  pppp\        P                  P                  RW{V.V.V	4      p\        WxWV4      V P                  V&   . VOVN# )ar  
Given an input for a node (which is not a initializer), this function

- add nodes to compute zero point and scale for this input if they don't exist.
- add new QuantizeLinear node to quantize the input.

:param node: node being quantized in NodeProto format.
:param input_index: index of input in node.input.
:param qType: type to quantize to.
:param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
:param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
:param initial_type: type of the weight to quantize
:return: List of newly created nodes in NodeProto format.
r  z*Cannot access undefined variable in graph._QuantizeLinearNTr{   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r-   r   r.  rV   rW   rp   r/   r1   r   r   r   r   r
   rA   )rD   r7   input_indexr   given_scale_namegiven_zp_namer   r   r   ql_node_name
data_foundr*  zp_namer   r   qlinear_noder)  zp_shapes   &&&&&&&           rP   _get_quantize_input_nodes'ONNXQuantizer._get_quantize_input_nodes2  s   " ZZ,
RM!MM #;;!$55(}/H/35EGJG484Q4QR\4]1JGQ;;00 1	L {{{ &&&5J4J4J4P4P+P'(2
$}4#{{44+L g6 	  $/ "",~k](SXRYY`ae`fh/ ??
SX?t#{{44$W5 M 	  0>jWalq/r  ,%%%%rb   c                    WP                   9   d   V P                   V,          # V P                  e   V P                  P                  V4      # R # r   )rA   rZ   find_quantized_value)rD   r   s   &&rP   r?  "ONNXQuantizer.find_quantized_valuex  sA    111++J77;;";;33J??rb   c
                f   \         P                  ! V4      p
VRV
,          ,          V,          p\         P                  ! VP                  4       \         P                  R7      p\         P                  ! VP                  4       \         P                  R7      pW,          pW8  d   VR8  d   W,          pW,          pV	f;   \
        P                  ! RV RV RV R24       R\         P                  ! VVR7      3# \
        P                  ! R	V	 R
V RV RV R2	4       RVP                  V4      3# RV3# )zHAdjust a single weight scale to ensure the int32 bias does not overflow.r   r  r   zIncreasing scale for weight `z` by the ratio z to ensure bias `z` has a valid scale.TzIncreased scale[z] for weight `z` by ratio F)r  absr  itemr  r   r   r  )rD   bias_valinput_scaleweight_scaleweight_scale_dtypeweight_name	bias_nameqrangemultiplicative_epsilonidxabsmaxbias_smallest_valid_scaleinput_scale_fp64weight_scale_fp64bias_candidate_scaleratio	new_scales   &&&&&&&&&&       rP   $adjust_single_weight_scale_if_needed2ONNXQuantizer.adjust_single_weight_scale_if_needed  s/    !$:cFl$Kf$T!88K$4$4$6bjjIHH\%6%6%8

K/C <CWZ]C]-DE)1I{3K=PUw W$$-;.BD RXXi7IJJJ&se>+kRWQX Y''0k1EG Y--.@AAAl""rb   c                   < V ^8  d   QhRS[ P                  RS[ P                  RS[RS[P                  RS[RS[S[S[ P                  R,          3,          /# )   rF  rG  rI  bias_tpis_per_channelreturnN)r  ndarrayr   rV   r   booltuple)format__classdict__s   "rP   __annotate__ONNXQuantizer.__annotate__  sh     6% 6%ZZ6% jj6% 	6%
 !!6% 6% 
tRZZ$&&	'6%rb   c                   VP                   '       g   R# \        V4      p\        P                  ! \        P                  4      pRp\        P
                  ! VP                  \        P                  R7      \        P
                  ! VP                  ^,           \        P                  R7      ,
          p	VP                  p
RpV'       Eg   \        P                  ! VP                  4       \        P
                  ! ^ \        P                  R7      4      p\        P                  ! VP                  4       \        P
                  ! ^ \        P                  R7      4      p\        P                  ! \        P                  ! V4      \        P                  ! V4      4      pV P                  VVVV
VVP                  V	V4      w  ppV'       d   TpRpW3# VP                  '       d   \!        VP                  4      ^8X  dj   \#        VP                  ^ ,          4       FI  pV P                  VV,          VVV,          V
VVP                  V	VVR7	      w  ppV'       g   KB  VVV&   RpKK  	  W3# )zOChecks if the bias scale is too small and increases the weight scale if needed.Fgqh ?rB  T)rM  )FN)sizer   r  iinfoint32r  maxr  minr  minimummaximumrC  rU  r)   shaperk   r   )rD   rF  rG  rI  rY  rZ  bias_float_data
int32_inforL  rK  rH  updatedrminrmaxrN  changedrT  r   s   &&&&&&            rP   #_adjust_weight_scale_for_int32_bias1ONNXQuantizer._adjust_weight_scale_for_int32_bias  s       /8XXbhh'
!'*..

;bhhz~~XYGYacakak>ll)//~::o113RXXarzz5RSD::o113RXXarzz5RSDZZtbffTl;F!%!J!J"&	"GY ($ $$# C(:(:$;q$@<--a01%)%N%N#A& O&LL* &O 
&" 7&/LO"G 2  $$rb   c                >   < V ^8  d   QhRS[ RS[P                  RR/# )rX  rI  rT  r[  N)r   r  r\  )r_  r`  s   "rP   ra  rb    s&     $1 $1c $1bjj $1T $1rb   c           	     J   WP                   9  d   R# V P                   V,          p\        WP                  P                  4       4      p\        VP                  V P                  P                  4       4      p\        VP
                  V P                  P                  4       4      p\        VP                  V P                  P                  4       4      pVe   Ve	   Ve   Vf   R# V P                  P                  V4       V P                  P                  V4       \        P                  P                  V4      pVP                  p	\        P                  ! V\        P                  P                  VP                   4      R7      p
\        P                  P#                  V
P%                  VP&                  4      VP                  4      pV P                  P)                  V4       \+        VV P,                  VV
V	VP                  R7      pV P                  P)                  V4       R# )zCRe-quantizes the given weight initializer using the provided scale.NrB  )quant_weight_name)rA   r   r%   r   r*  r9  q_nameremove_initializerrV   numpy_helperto_arrayaxisr  asarrayrW   tensor_dtype_to_np_dtyper   
from_arrayr  dimsr   r   rG   )rD   rI  rT  qv	weight_tp
scale_initzp_initq_weight_initweight_zero_pointr{  scale_npnew_scale_initnew_q_weights   &&&          rP   _requantize_weight ONNXQuantizer._requantize_weight  s    666%%k2 jj.D.D.FG	!"--1G1G1IJ
rzz4::+A+A+CD$RYY

0F0F0HI
 2goI^

%%j1

%%m4 --66w?ww ::it{{/S/ST]TgTg/hi**55h6F6Fz6WY[YfYfg

"">2 1 ii
 	

""<0rb   c                2   WP                   9   d   V P                   V,          P                  # V P                   V,          P                  p\        WPP                  P                  4       4      p\        V4      pW P                   9   d   V P                   V,          P                  pM6W P                  9   d   V P                  V4      w  r  p	M\        RV R24      h\        WP                  P                  4       4      p
\        V
4      pV P                   V,          P                  p\        WP                  P                  4       4      pVe    \        P                  P                  V4      MRpV P                  pVe   VP                  '       d   VP!                  4       '       g   V P"                  \$        P&                  P(                  39   dX   \        WP                  P                  4       4      pV P+                  VVVVV4      w  ppV'       d   V P-                  VV4       TpV P/                  WWt4      w  ppppppWP                   9  g   Q h\1        TTTT\2        P4                  VP                  ^8  d   ^ MRVVR7      pVV P                   V&   V# )zM
Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
z	Expected z5 to be in quantized value map for static quantizationN)	node_type
node_qtype)rA   rw  r*  r   r%   r   r   r<   r.  r:   r9  rV   ry  rz  rE   rd  r   rG   r   r   r   rr  r  quantize_bias_static_implr
   r   Initializer)rD   rJ  r   rI  betaweight_scale_nameweight_initializerrG  r   r   inputscale_initializerrF  weight_zp_nameweight_zp_initr  rZ  bias_initializer
did_updatenew_weight_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datar  r  quantized_values   &&&&&                     rP   quantize_bias_static"ONNXQuantizer.quantize_bias_static  sb    000++I6=== !44[ALL)*;ZZ=S=S=UV,-?@ 111#77
CNN333+/+H+H+T(AAqy4ijkk!-.>

@V@V@X!Y+,BC 11+>FF%njj6L6L6NOJXJdD--66~Fjn)))!&&&%))++!!j&<&<&A&A%CC+Izz7M7M7OP+/+S+S ,(J( ''5EF/ **9<V	
%"  8 8888(%"** %%)At!	
 />  +""rb   c                v    WP                   9   ;'       g%    WP                  9   ;'       g    WP                  9   # )za
only check for value info and newly generated tensor names, initializers are checked separately
)r*   r6   rC   r   s   &&rP   contains_tensorONNXQuantizer.contains_tensorJ  sA    
 ,,, ; ;000; ;999	
rb   c           
     2    V P                  VVR R R RVR7      # )Fr7   indicesinitializer_use_weight_qTyperF   op_level_per_channelr{  from_subgraphr  _ONNXQuantizer__quantize_inputs)rD   r7   r  r  s   &&&&rP   quantize_activation!ONNXQuantizer.quantize_activationT  s/    %%).!&' & 
 	
rb   c           
     2    V P                  VVR VVVVR7      # )Tr  r  )rD   r7   r  rF   r  r{  r  s   &&&&&&&rP   quantize_weightONNXQuantizer.quantize_weighta  s1     %%)-%!5' & 
 	
rb   c                	   . p. p	. p
. pV EF  pVP                   V,          pWP                  9   dg   V P                  V,          pVP                  VP                  4       V	P                  VP                  4       V
P                  VP
                  4       K  V'       g6   V
P                  R4       VP                  R4       V	P                  R4       K  \        WP                  P                  4       4      pVe   V P                  '       dJ   V'       dB   T P                  VP                  V'       d   V P                  MV P                  VV4      w  pppM5T P                  TV'       d   V P                  MV P                  V4      w  pppV
P                  V4       V	P                  V4       VP                  V4       EK  V P                  V4      '       EdX   V P                  P!                  VR,           V P"                  V P                  P%                  4       4      pVEf1   VP                   V,          pVV P&                  9   d   V P&                  V,          pVP)                  R4      '       g   Q RV R24       hVP*                  P)                  R4      '       g   Q RV R24       hVP*                  P,                  P.                  pM0VV P0                  9   g   Q RV: R	24       hV P0                  V,          pV P3                  WV P                  VR
7      pVf   Ru # V'       d   V P5                  V4       MVP7                  V4       VR,          pVP8                  R8X  dc   V
P7                  VP:                  4       VP                  VP                   ^,          4       V	P                  VP                   ^,          4       EK  V
P                  VP:                  ^ ,          4       VP                  VP:                  ^,          4       V	P                  VP:                  ^,          4       EK&  V P<                  et   V P<                  P?                  VV.VVVVRR7      w  ppppV
P                  V^ ,          4       VP                  V^ ,          4       V	P                  V^ ,          4       EK  \A        RV RV PB                   24      h	  WW3# )aC  
Given a node, this function quantizes the inputs as follows:
    - If input is an initializer, quantize the initializer data, replace old initializer
      with new initializer
    - Else, add QuantizeLinear nodes to perform quantization
    parameter node: node being quantized in NodeProto format.
    parameter indices: input indices to quantize.
    return: (List of quantized input names,
             List of zero point names used for input quantization,
             List of scale names used for input quantization,
             List of new QuantizeLinear nodes created)
r  r1  rg   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r3  r{   T)r  rF   r  r{  r  z!Invalid tensor name to quantize: z @graph scope)NNNNr  )"r-   rA   r   r*  r9  rw  r   r%   r   rE   quantize_weight_per_channelr)   rG   rH   quantize_initializerr  find_node_by_namer4   r'   r*   r   rg   r   r   r6   r<  r   ro   rl   r,   rZ   r  r:   r5   )rD   r7   r  r  rF   r  r{  r  scale_nameszero_point_namesquantized_input_namesr   r4  
node_inputr  r   q_weight_namer9  r*  r:  r   r(   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s   &&&&&&&&                    rP   __quantize_inputsONNXQuantizer.__quantize_inputst  s_   .  ""KK0J 555"&":"::"F""?#=#=> ''(?(?@%,,_-C-CD%,,R0""2& ''+&z::3I3I3KLK&###(<
 88#((-I))tOdOd$		%" :>9R9R#-I))tOdOd$:6M7J &,,]; ''0"":.%%j11#zz;;!22DNNDJJDTDTDV   '!%K!8J!T%5%55%)%5%5j%A
)226::ck*Ub<cc:)77FFs+V`UaarHssF'1'B'B'L'L  *T->->> 9* H+ ,>
 (,'8'8'D+/+I+I4+@+@| ,J ,( ,377$**+?@%9:#7#;L''+;;)001D1DE&&|'9'9!'<=$++L,>,>q,AB)001D1DQ1GH&&|':':1'=>$++L,?,?,BC( KK11 M1M!-)="& 2 0+& &,,-I!-LM""#5a#89 ''(?(BC !#DZLP]^b^n^n]o!pqqG #J %JJrb   c                j   VP                   V P                  9   dA   V P                  VP                   ,          pVP                  VP                  VP                  3# V P                  WW44      w  rgp\        VP                   VVV\        P                  R4      pWPP                  VP                   &   WgV3# )aj  
:param weight: TensorProto initializer
:param qType: type to quantize to
:param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                          If keep_float_weight is False, quantize the weight, or don't quantize the weight.
:return: quantized weight name, zero point name, scale name
N)	r)   rA   rw  r9  r*  quantize_initializer_implr
   r   r  )	rD   r   r   rF   keep_float_weightr  r  r9  r*  s	   &&&&&    rP   r  "ONNXQuantizer.quantize_initializer  s     ;;$222"66v{{CO&&''**  .2-K-K<.
*

 )KK**
 1@  -z11rb   c                   WP                   9   d7   V P                   V,          pVP                  VP                  VP                  3# V P	                  WW4V4      w  rxp	\        VVV	V\        P                  R 4      pW`P                   V&   WxV	3# r   )rA   rw  r9  r*   quantize_weight_per_channel_implr
   r   r  )
rD   rI  rG   channel_axisrF   r  r  r  r9  r*  s
   &&&&&&    rP   r  )ONNXQuantizer.quantize_weight_per_channel  s     222"66{CO&&''**  .2-R-R|CT.
*
 )**
 1@  -z11rb   c                &   WP                   9   Ed   WP                  9  Edo   V P                   V,          p\        VP                  V P                  P                  4       4      pV P                  P                  P                  R8w  g*   V P                  P                  P                  R8X  d9   Ve5   Ve1   \        P                  P                  V4      P                  ^8X  g   Q hVR,           pV P                  P                  W@P                  V P                  P                  4       4      pVfH   VP                  VP                  VP                  .p\        P                   P#                  RWa.V4      pV# WP$                  ^ ,          8X  g   Q hR# )a~  
Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
it back to float32 or float16
    parameter value_name: value to dequantize
    parameter new_nodes_list: List of new nodes created before processing current node
    return: None if there is already a DequantizeLinear node that dequantizes it
            A DequantizeLinear node otherwise
rS   N_DequantizeLinearr|   )rA   rC   r   r*  r%   r   rT   rV   ry  rz  rd  r  r4   r'   rw  r9  rW   rp   r,   )rD   
value_namer  r  dqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes   &&      rP   _dequantize_valueONNXQuantizer._dequantize_value8  sR    222KeKe9e"66zBO &o&@&@$**BXBXBZ[J zz--1AA

  ..2BBzG] ")T->->-G-G
-S-X-X\]-]]]&)<<M JJ88X\XbXbXhXhXjkM$#**#..#++#
 #'++"7"7&}# '& "%9%9!%<<<<rb   c                    V P                   P                  4       P                   F?  pV P                  VP                  4      pVf   K$  V P
                  P                  V4       KA  	  R# )z
Dequantize output if it is quantized
    parameter new_nodes_list: List of new nodes created before processing current node
    return: List of new nodes created
N)r%   r'   r,   r  r)   r4   r   )rD   r,   r  s   &  rP   r   !ONNXQuantizer._dequantize_outputs_  sM     jj&&(//F"44V[[AO*%%o6 0rb   c           	        V P                   f   R # V P                  4        / pV P                    EF  pV P                   V,          p\        V\        4      '       g   \	        R\        V4       RV: R24      hV P                  P                  V/ R7      pV P                  pRV9   d   VR,          P                  pRV9   d   RV9   d   VR,          VR,          rvMV\        P                  P                  8X  d    \        WSP                  ^,          4      w  rgMVP                  RVP                   ^ ,          4      pVP                  R	VP                   ^,          4      p	VP                  R
V P"                  4      p
VP                  RR4      p\%        W[V
R7      w  r\'        WWWP(                  4      w  rg\+        WgVR7      W&   EK  	  V# )Nr  r  r   )default_valr  r  r  ro  rp  	symmetricrF   F)rF   r  )r  r  r  )rI   adjust_tensor_rangesr  r   r  rg   tensor_quant_overridesget_per_tensor_overridesrH   r   rV   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rD   r<   r   tdquant_overridesr  zeror  ro  rp  r  rF   qminqmaxs   &             rP   r;   +ONNXQuantizer.calculate_quantization_paramsk  s   %!!# --K##K0Bb*--"248*E+PQ RSS"99RRS^lnRoO..J.,\:FF
/)lo.M-l;_W=Uet//<<<5j**Q-Pe&**62>>!3DE&**62>>!3DE+//T=Y=YZ	.22>5I4Zfop
.t4yReRef/ATku/v,/ .2 #"rb   )r>   r=   r?   r@   r1   rC   r5   r.   r%   r4   r3   r<   rA   r/   r6   r*   r   )F)NN)NNN)g      ?)FFr  F)TFFr  F)FF)TF)#__name__
__module____qualname____firstlineno__r$   r`   rw   r   r   r   r[   r   r   r   r   r   r   r.  r<  r?  rU  rr  r  r  r  r  r  r  r  r  r  r   r;   __static_attributes____classdictcell__)r`  s   @rP   r   r   &   s     FMR/> fD
<+ Z
"22ARAh\7|9PvD&L##J6% 6%p$1 $1LF#P
	

&AKF2B2@%N
7 #  #rb   r   )#r   numpyr  rV   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r    rb   rP   <module>r     sK        & = ! !    & (e#M e#rb   