+
    9i                       ^ RI Ht ^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RIH	t	 ^ RI
t
^ RIt^ RIHtHtHt ^ RIHtHtHt ^ RIHt ^ RIHtHtHtHt ^ R	IHt ^ R
IHtHtHt  ^ RI H!t! Rt#Rt$Rt%Rt&Rt'Rt(Rt)Rt*Rt+Rt,/ t-].! ]4       U u/ uF%  p ]/! ]0! ]V 4      ]14      '       g   K  ]0! ]V 4      V bK'  	  up t2 ! R R]4      t3 ! R R]4      t4 ! R R]4      t5 ! R R]4      t6]P"                  Pn                  ]
Pp                  ! R4      ]P"                  Pr                  ]
Pp                  ! R4      ]P"                  Pt                  ]
Pp                  ! R 4      ]P"                  Pv                  ]
Pp                  ! R!4      ]P"                  Px                  ]]P"                  Pz                  ]]P"                  P|                  ]/t?]P"                  Pr                  ]
P                  ! ^ ]
P                  R"7      ]
P                  ! ^]
P                  R"7      3]P"                  Pn                  ]
P                  ! Rh]
P                  R"7      ]
P                  ! ^]
P                  R"7      3]P"                  Pv                  ]
P                  ! ^ ]
P                  R"7      ]
P                  ! R#]
P                  R"7      3]P"                  Pt                  ]
P                  ! Ri]
P                  R"7      ]
P                  ! R$]
P                  R"7      3]P"                  P|                  ]
P                  ! ^ ]R"7      ]
P                  ! ^]R"7      3]P"                  Pz                  ]
P                  ! Rj]R"7      ]
P                  ! ^]R"7      3/tE]P"                  Pn                  ]
P                  ! Rk]
P                  R"7      ]
P                  ! ^]
P                  R"7      3]P"                  Pt                  ]
P                  ! Rl]
P                  R"7      ]
P                  ! R$]
P                  R"7      3/tF]P"                  Pr                  ]
P                  ! ^ ]
P                  R"7      ]
P                  ! ^]
P                  R"7      3]P"                  Pn                  ]
P                  ! Rm]
P                  R"7      ]
P                  ! ^@]
P                  R"7      3]P"                  Pv                  ]
P                  ! ^ ]
P                  R"7      ]
P                  ! R$]
P                  R"7      3]P"                  Pt                  ]
P                  ! Rn]
P                  R"7      ]
P                  ! R%]
P                  R"7      3]P"                  P|                  ]
P                  ! ^ ]R"7      ]
P                  ! ^]R"7      3]P"                  Pz                  ]
P                  ! Ro]R"7      ]
P                  ! ^]R"7      3/tGR&Rp/R' ltHRqR( ltIRrR) ltJR* tKRsR+ R, lltLRsR- R. lltMRqR/ R0 lltNRtR1 ltORtR2 ltPR3 R4 ltQR5 R6 ltR ! R7 R84      tS ! R9 R:4      tT ! R; R<4      tUR= tVR> tWR? tXR@ tYRA RB ltZRC t[RuRD lt\RvRE lt]RF RG lt^RH RI lt_RJ RK lt`RL RM ltaRN RO ltbRP RQ ltcRR RS ltdRT RU lteRV RW ltfRX RY ltgRZ R[ lthR\ R] ltiR^ R_ ltjR` Ra ltkRb Rc ltlRd Re ltmRf Rg ltnR#   ]" d    Rt! EL_i ; iu up i )w    )annotationsN)Enum)Path)float8_e4m3fnint4uint4)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptions)to_array_extendedzonnx.quantizez0.1.0ai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                  4    ] tR t^4t^ t^tR t]R 4       tRt	R# )QuantizationModec                	    V P                   # Nnameselfs   &b/var/www/html/photoedit/myenv/lib/python3.14/site-packages/onnxruntime/quantization/quant_utils.py__str__QuantizationMode.__str__8       yy    c                	P     \         V ,          #   \         d    \        4       hi ; ir   )r   KeyError
ValueError)modes   &r$   from_stringQuantizationMode.from_string;   s)    	#D)) 	,	    % N)
__name__
__module____qualname____firstlineno__
IntegerOps
QLinearOpsr%   staticmethodr-   __static_attributes__r0   r(   r$   r   r   4   s%    JJ  r(   r   c                  4    ] tR t^Ct^ t^tR t]R 4       tRt	R# )QuantizedValueTypec                	    V P                   # r   r    r"   s   &r$   r%   QuantizedValueType.__str__G   r'   r(   c                	P     \         V ,          #   \         d    \        4       hi ; ir   )r:   r*   r+   )vs   &r$   r-   QuantizedValueType.from_stringJ   s)    	%a(( 	,	r/   r0   N)
r1   r2   r3   r4   InputInitializerr%   r7   r-   r8   r0   r(   r$   r:   r:   C   s%    EK  r(   r:   c                  X    ] tR t^Rt^ t^t^t^t^t^t	^t
R t]R 4       t]R 4       tRtR# )	QuantTypec                	    V P                   # r   r    r"   s   &r$   r%   QuantType.__str__[   r'   r(   c                	P     \         V ,          #   \         d    \        4       hi ; ir   )rC   r*   r+   )ts   &r$   r-   QuantType.from_string^   s(    	Q< 	,	r/   c                	(   V \         P                  8X  d   \        P                  # V \         P                  8X  d   \        P
                  # V \         P                  8X  d   \        P                  # V \         P                  8X  d   \        P                  # V \         P                  8X  d   \        P                  # V \         P                  8X  d   \        P                  # V \         P                  8X  d   \        P                  # \!        R V : R24      h)zUnexpected value qtype=.)rC   QInt8r
   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r+   r"   s   &r$   tensor_typeQuantType.tensor_typee   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r(   r0   N)r1   r2   r3   r4   rK   rM   rS   rQ   rO   rW   rU   r%   r7   r-   propertyrY   r8   r0   r(   r$   rC   rC   R   sR    EFMFGEF   > >r(   rC   c                  4    ] tR t^xt^ t^tR t]R 4       tRt	R# )QuantFormatc                	    V P                   # r   r    r"   s   &r$   r%   QuantFormat.__str__|   r'   r(   c                	P     \         V ,          #   \         d    \        4       hi ; ir   )r]   r*   r+   )formats   &r$   r-   QuantFormat.from_string   s)    	v&& 	,	r/   r0   N)
r1   r2   r3   r4   	QOperatorQDQr%   r7   r-   r8   r0   r(   r$   r]   r]   x   s%    I
C  r(   r]   int8uint8int16uint16dtypei  i  i @  zero_point_indexc                   . p\        V4       EF	  w  r4\        P                  ! \        V4      \        P                  4      '       d'   VP                  \        P                  ! V4      4       MC\        V\        P                  4      '       d   VP                  V4       M\        R V RV 24      hW08X  g   K  VR,          pVP                  \        P                  8X  g"   VP                  \        P                  8X  g   K  \        RVP                   24      h	  \        V4      ^8  d   \        V4      # V^ ,          # )zarg z is not an array: zzero_point cannot be )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorrj   float32float16lentuple)rk   argsnew_argsiar>   s   $*    r$   _check_typer      s    H$DGU\\22OOEKKN+5==))OOAd1#%7s;<< Aww%--'177emm+C"7y ABB   "(ma/5?@Xa[@r(   c                   V \         9   g   Q R V  R24       hV \        P                  P                  \        P                  P                  \        P                  P
                  \        P                  P                  39   Ed<   V^ 8w  d   \        RV: R24      hVP                  \        P                  8X  d   \        P                  pMIVP                  \        P                  8X  d   \        P                  pM\        RVP                   R24      h\        \!        \#        R. R.\$        P&                  P)                  RV . ^ .4      R7      \#        R. ROR.4      .R\+        R	VR4      \+        R
VR4      .\+        RV R4      .4      4      p\-        V4      p\/        VP1                  RR	VR
V/4      ^ ,          4      # \         V ,          p	\3        V RRR7      w  rVe   \5        W4      MT
pVe   \7        W4      MTp\        P8                  ! VP;                  \        P                  4      V,          P=                  4       V,           4      p\        P>                  ! WWR7       \/        VP;                  V	4      4      # )Unexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.z2zero_point is expected to be null for float 8 not rJ   zUnexpected dtype Constant
zero_point)valuer   XscaleYquNF)reduce_range	symmetric)out)r   r   r   ) ONNX_TYPE_TO_NP_TYPE
onnx_protor
   rT   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorrj   ro   rx   FLOATry   FLOAT16r+   r   r   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefrj   qminqmaxcliplowcliphigharr_fp32s   &&&&&&         r$   quantize_nparrayr      s'   (( 
w&de( ++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774#sGU)CDQGHH %U+,URWX
$'O#d.&*&63t?D==#**U]]";e"C!J!J!Lz!YZ

8h=8??5122r(   c           
     "   V^ 8  g   V^ 8  d   \        RV RV 24      h\        P                  ! V \        P                  ! ^ V P                  R7      4      p \        P
                  ! V\        P                  ! ^ VP                  R7      4      pVe2   \        W\        P                  ! WPP                  R7      ,           4      pV'       dF   \        P
                  ! \        P                  ! V 4      \        P                  ! V4      4      pV) p V5pW#8:  g   Q RV  RV 24       h\        P                  ! W,
          \        P                  R7      p\        P                  ! V\        P                  R7      \        P                  ! V\        P                  R7      ,
          p\        P                  ! Wx,          4      p	V	^ 8  g   Q R4       hV	\        P                  ! VP                  4      P                  8  dH   \        P                  ! RVP                  R7      p	\        P                  ! ^ VP                  R7      p
W.# V'       di   \        P                  ! \        P                  ! W#,           \        P                  ! R\        P                  R7      ,          4      VP                  R7      p
MC\        P                  ! \        P                  ! W V	,          ,
          4      VP                  R7      p
V	P                  VP                  4      p	W.# )	a  Calculate the scale s and zero point z for the quantization relation
r = s(q-z), where r are the original values and q are the corresponding
quantized values.

r and z are calculated such that every value within [rmin,rmax] has an
approximate representation within [qmin,qmax]. In addition, qmin <= z <=
qmax is enforced. If the symmetric flag is set to True, the interval
[rmin,rmax] is symmetrized to [-absmax, +absmax], where
absmax = max(abs(rmin), abs(rmax)).

:parameter rmin: minimum value of r
:parameter rmax: maximum value of r
:parameter qmin: minimum value representable by the target quantization data type
:parameter qmax: maximum value representable by the target quantization data type
:parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:return: zero and scale [z, s]

Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:ri   zqmin=z > qmax=zscale issue      ?g       @)r+   ro   minimumrt   rj   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s   &&&&&&     r$   compute_scale_zpr      s   ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4nJJ OOPuyy		$@ww<55htf55<	T[	6B	T	/%++d%--2X	XBKK EA:$}$:u{{4::&+++Ctzz2[[$**5
   T[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r(   c                   RpV \         9  d   V \        P                  8X  d   ^ RIHp Tp\        R4       Uu. uF  p\        V4      NK  	  pp\        P                  ! V Uu. uFA  p\        P                  ! V4      '       d   K!  \        P                  ! V4      '       d   K?  VNKC  	  up\        P                  R7      pM\        RV  R24      hV\         V &   MV \        P                  8X  d	   ^ RIHp TpVf   \        RV  R24      h\        P                  ! \         V ,          4      p\        P                  ! ^ VR7      p	\        P                  ! W,          VP                  R7      p
W.# u upi u upi )	aZ  Calculate the scale s for a float8 type (E4M3FN).
The function assumes the coefficient distribution and the float 8
distribution are similar to two gaussian laws.

:return: zero and scale [z, s]

More details in notebook `quantization_fp8.ipynb
<https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
N)r      ri   zQuantization to element_type=z not implemented.zUnexpected element_type rJ   )FLOAT8_DISTRIBUTIONSr
   rT   	ml_dtypesr   rangefloatro   rt   isnanisinfrx   r+   rw   stdrj   )element_typer   zp_dtyper   r~   
all_valuesfvaluesstd_f8zeror   s   &&         r$   compute_scale_zp_float8r   ,  s$    H//;333/$H,1#J7Jq%(JJ7[[&TJqekk!nU[[QR^JT\a\i\iF <\NJ[\]]-3\*	11	1+ 2<.BCCYY+L9:F;;q)DKKCII6E=# 8Ts   E."E3E3#E3c               8    V ^8  d   QhRRRRRRRRRR	R
R	RR	RR/# )   datanumpy.ndarray
quant_typeonnx.TensorProto.DataTyper   boolr   r   zfloat | Nonermin_overridermax_overridereturnz#tuple[numpy.ndarray, numpy.ndarray]r0   )ra   s   "r$   __annotate__r   P  sb     ;G ;G
;G);G ;G 	;G
 !;G  ;G  ;G );Gr(   c                0   \        V \        P                  4      '       g   \        R\	        V 4       R24      hVe   TpM#\        V 4      '       d   V P                  4       MRpVe   TpM#\        V 4      '       d   V P                  4       MRp\        P                  ! WpP                  R7      p\        P                  ! WP                  R7      p\        P                  ! RV P                  R7      p	V\        P                  8X  dD   V'       d   \        R4      h\        P                  ! V 4      p
\        W4      w  r\        W^ R7      # V\        P                   \        P"                  \        P$                  \        P&                  \        P(                  \        P*                  39   d`   \-        WVR7      w  r\        V 4      '       d   \/        WxWW$4      w  rM"\        P                  ! ^ VP                  R7      p\        W^ R7      # \1        R	V R24      h)
a  
Returns the zero_point and scale for the given data.

:param data: The data for which to compute quantization parameters.
:param quant_type: The quantization data type.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: zero point and scale
z%Weight must be given as an array not rJ   g        ri   r   z1Unsupported option reduce_range=True for float 8.)rk   r   z Unexpected value for quant_type=)ru   ro   rv   rw   rq   rz   r   r   rt   rj   r
   rT   RuntimeErrorr   r   r   rL   rN   rR   rP   rX   rV   r   r   r+   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s   &&&&&&&       r$   compute_data_quant_paramsr   P  s   * dEMM**?T
|1MNN  YYtxxzC  YYtxxzC;;t::.D;;t::.DKK4::.E[---RSSiio3JD
:qAA  -ZQZ[
t99 0T cJQdjj9J:qAA
7
|1E
FFr(   c                   V ^8  d   QhRR/# )r   r   z2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]r0   )ra   s   "r$   r   r     s     := :=7:=r(   c                   \        V VVVVVV4      w  rxV\        P                  8X  d   \        WW4      p	\	        V	P                  \        P                  4      P                  4       ^,          ^8H  4      '       dg   \        P                  ! V 4      p
\        RV
P                  4        RV
P                  4        RV	P                  4        RV	P                  4        R2	4      hWxV	3# V\        P                  \        P                  \        P                  \        P                   \        P"                  \        P$                  39   d   \        WW4      p	WxV	3# \'        RV R24      h)a   
:param data: data to quantize
:param qType: data type to quantize to.
:param symmetric: whether symmetric quantization is used or not.
:parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
:parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
:parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
:parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
:return: minimum, maximum, zero point, scale, and quantized weights

To pack weights, we compute a linear transformation

- when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
- when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
    `m = max(abs(rmin), abs(rmax))`

and add necessary intermediate nodes to transform quantized weight to full weight using the equation

:math:`r = S(q-z)`, where

- *r*: real original value
- *q*: quantized value
- *S*: scale
- *z*: zero point
z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rJ   )r   r
   rT   r   anyviewro   rf   ravelr   r   r   r   rL   rN   rR   rP   rX   rV   r+   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas   &&&&&&&    r$   quantize_datar     sO   8 2J ((()%uI##EKK06683>3FGGmmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  .00  *%uI.00
25';
<<r(   c               4    V ^8  d   QhRRRRRRRRRR	R
RRR/# )r   weightzonnx.TensorProtor   r   r   r   r   axisz
int | Nonequant_weight_namez
str | Noner   r0   )ra   s   "r$   r   r     sW     L  L L )L  L  	L 
 L  "L  L r(   c                   \        V 4      pRpVf   \        WP                  4       W24      pMVP                  V,          p\	        VP                  4      p	^W&   . p
\        V4       Fr  pVP                  W4      pW;,          pW+,          p\        WP                  4       W4      pV
P                  \        P                  ! V4      P                  V	4      4       Kt  	  \        P                  ! W4      pV'       d   TMV P                   \         2pV\        P                  P                   8X  Ed6   \        P                  ! 4       pVVn        VP$                  P'                  V P$                  4       VVn        VP)                  4       P+                  4       P-                  4       Vn        \0        e   \1        V4      pVP                  VP                  8w  g$   VP-                  4       VP-                  4       8w  dj   \3        RVP                   RVP-                  4       R,           RVP-                  4       R,           RV P                   R\5        V4      R,           R	24      hV# V\        P                  P6                  \        P                  P8                  39   d|   VP:                  \<        \>        39  d   \3        R
V R24      h\A        \C        VP-                  4       4      4      p\        PD                  PG                  VWP$                  VRR7      pV# \        PD                  PI                  V4      p\        P                  ! VVR7      P                  V P$                  4      p\        PJ                  PM                  VV4      pV# )a  
Returns a quantized version of the given ONNX initializer.

:param weight: The ONNX initializer to quantize.
:param quant_type: The final quantized data type.
:param zero_point: The zero-point value to use for quantization.
:param scale: The scale value to use for quantization.
:param axis: The quantization axis if quantizing per-channel. Defaults to None.
:param quant_weight_name: The name of the quantized initializer.
                          If not specified, the quantized name is generated.
:return: The quantized ONNX initializer.
NzThe initializer of shape z! could not be created, expecting :N
   Nz, got z and shape=z
raw=:N   NrJ   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawri   )'tensor_proto_to_arrayr   r   shapelistr   takers   ro   r   reshapeconcatenater!   TENSOR_NAME_QUANT_SUFFIXr   r
   rT   	data_typedimsextendflattencopytobytesraw_datar   r   strrX   rV   rj   r   r   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)r   r   r   r   r   r   weight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr~   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes   &&&&&&               r$   quantize_onnx_initializerr    s   ( (/K*.M|(5F5F5H%\#))$/K--.&(#}%A&++A4L!HM!+%5..0-&" (..u}}=S/T/\/\]i/jk & ))*EL):%6;;-PhOi@jMT%%222#//1)3&!!((5$1!(5(=(=(?(D(D(F(N(N(P%( &&:;E{{k///5==?mF[F[F]3]"/0A0A/BBc$,,.s34F5==?3;O:PP[\b\h\h[iS!56t<=Q@ (   
((--t/?/?/E/EF	FtUm3!7Ftuvv .}/D/D/FGH  ${{66}jR]R]_jpt6u  	 ==jIm>JRRSYS^S^_#00;;M=Yr(   c                   V \         P                  P                  8X  d   \        R4      hRpV'       d   \        P                  V 4      pM6V'       d   V \        9   d   \        V ,          pM\        P                  V 4      pV'       g   \        RV  R24      hVw  rEV^ 8  g   V^ 8  d(   \        RV RV RVP                   RV R	V R
V  24      hV# )z
Return qmin and qmax, the minimum and maximum value representable by the given qType
:parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
:return: qmin, qmax
z;This function is not implemented for float 8 as not needed.Nr   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r
   rT   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr+   rj   )r   r   r   qranger   r   s   &&&   r$   r   r     s     
&&333!"_``F,007	u ==.u5$((/07uvwwJDax4!86$x

|?<. Y"8E74
 	
 Mr(   c                0    \        WVR7      w  r4WC,
          # )z
Helper function to get the quantization range for a type.
    parameter qType: quantization type.
    return: quantization range.
r   )r   )r   r   r   r   r   s   &&&  r$   get_qrange_for_qTyper  :  s     )	RJD;r(   c               $    V ^8  d   QhRRRRRR/# )r   r   intrankr   ztuple[bool, int]r0   )ra   s   "r$   r   r   D  s"     	 	 	C 	,< 	r(   c                L    V ^ 8  d	   W,           MT pV^ 8  ;'       d    W!8  pW23# )z
Helper function that tries to return a normalized axis in the range [0, rank - 1].
:parameter axis: The axis to normalize.
:parameter rank: The tensor rank (number of dimensions).
:return (is_valid, axis_norm)
r0   )r   r  	axis_normis_valids   &&  r$   normalize_axisr   D  s0      $axTIA~22)"2Hr(   c                    V ^8  d   QhRRRR/# )r   src_8bitr   r   	bytearrayr0   )ra   s   "r$   r   r   P  s       9 r(   c                R   \        V 4      pV^ 8X  d   \        4       # V^,           ^,          p\        V4      p^ p^ pWA^,
          8  dH   W^,           ,          ^,          ^,          W,          ^,          ,          W5&   V^,          pV^,          pKT  WA8  d   W,          ^,          W5&   V# )a.  
Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
Assumes that the source values are already in the appropriate int4 range.
:parameter src_8bit: The 8-bit element values to pack.
:return A bytearray with every two 8-bit src elements packed into a single byte.
)rz   r#  )r"  	num_elemsdst_sizedstsrc_idst_is   &     r$   r   r   P  s     HIA~{A!#H
H
CEE a-
	*S0Q68?S;PQ


_s*
Jr(   c                  *    ] tR tRtRt. . R3R ltRtR# )QuantizedInitializerin  zB
Represents a linearly quantized weight input from ONNX operators
Nc
                	r    Wn         W n        W0n        W@n        WPn        W`n        Wpn        Wn        Wn        R # r   )	r!   initializerrminsrmaxszero_pointsscalesr   r   r   )
r#   r!   r-  r.  r/  r0  r1  r   r   r   s
   &&&&&&&&&&r$   __init__QuantizedInitializer.__init__s  s4     	&

&	,	r(   )	r   r   r-  r!   r   r/  r.  r1  r0  r1   r2   r3   r4   __doc__r2  r8   r0   r(   r$   r+  r+  n  s      r(   r+  c                  $    ] tR tRtRtRR ltRtR# )QuantizedValuei  zA
Represents a linearly quantized value (input\output\intializer)
Nc
                	r    Wn         W n        W0n        W@n        WPn        W`n        Wpn        Wn        Wn        R # r   )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r#   r!   new_quantized_namer;  zero_point_namequantized_value_typer   r>  r?  r@  s
   &&&&&&&&&&r$   r2  QuantizedValue.__init__  s2     "($&.	"$$r(   )	r   r?  r>  r9  r:  r;  r@  r=  r<  )NNNNr4  r0   r(   r$   r7  r7    s    % %r(   r7  c                       ] tR tRtRtR tRtR# )BiasToQuantizei  z#
Represents a bias to be quantized
c                	*    Wn         W n        W0n        R # r   	bias_name
input_nameweight_name)r#   rI  rJ  rK  s   &&&&r$   r2  BiasToQuantize.__init__  s    "$&r(   rH  Nr4  r0   r(   r$   rF  rF    s    'r(   rF  c                   V P                   ^ 8X  d   \        RV P                   R24      hV P                   ^8X  d   V P                  pEM5V P                   ^8X  d   V P                  pEMV P                   ^8X  d   V P
                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMV P                   ^8X  d   V P                  pMbV P                   ^	8X  d   V P                  pMDV P                   ^
8X  d   V P                  pM&\        RV P                   RV P                    R24      hV P                  V/# )z
Convert attribute to kwarg format for use with onnx.helper.make_node.
    :parameter attribute: attribute in AttributeProto format.
    :return: attribute in {key: value} format.
z
attribute z does not have type specified.z has unsupported type rJ   )rq   r+   r!   r   r~   srG   gfloatsintsstringstensorsgraphs)	attributer   s   & r$   attribute_to_kwargrV    s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r(   c                    V Uu. uF  q"P                   V 8X  g   K  VNK  	  pp\        V4      ^ 8  d
   V^ ,          # R# u upi )z
Helper function to find item by name in a list.
    parameter item_name: name of the item.
    parameter item_list: list of items.
    return: item if found. None otherwise.
N)r!   rz   )	item_name	item_listitemitemss   &&  r$   find_by_namer\    sB     (Bid99	+ATTiEB5zA~58/4/ Cs   ??c                b    Rp\        \        V4      4       F  pW,          V 8X  g   K  TpK  	  V# )z;
Helper function to return index of an item in a node list
rm   )r   rz   )	elem_name	elem_listelem_idxr~   s   &&  r$   get_elem_indexra    s2     H3y>"<9$H # Or(   c                F    \         P                  P                  RW.V4      # )z
Helper function to create a Mul node.
    parameter inputs: list of input names.
    parameter output: output name.
    parameter name: name of the node.
    return: Mul node in NodeProto format.
Mul)r   r   r   )inputsoutputr!   s   &&&r$   get_mul_noderf    s     ;;  $??r(   c               $    V ^8  d   QhRRRRRR/# )r   filenamer   
identifierr   r   r0   )ra   s   "r$   r   r     s&     R R4 RS RT Rr(   c                |    V P                   P                  V P                  V,           V P                  ,           4      # )zh
Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
)parentjoinpathstemsuffix)rh  ri  s   &&r$   generate_identified_filenamero    s+     ??##HMMJ$>$PQQr(   c                `   ^ RI p^ RIHp ^ RIpVP                  ! VP
                  R7       \        R4       \        V 4       \        R4       \        V4       VP                  WRR7       VP                  R4       VP                  R4       VP                  R	4       VP                  4        R# )
r   N)	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotro   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesrt  pltro   s   &&   r$   
apply_plotr    s{    #	S[[1	,	$K	
	*JJtdJ+JJ~JJxII()HHJr(   c           
     	  aaaaa ^ RI o^ RIp^ RIo^ RIHu Hu Hp ^ RIHu Hu Hp ^ RI	H
oHoHo \        P                  ! RV  24        ! VVVVV3R lRSP                  4      pSP!                  WR7      p\#        \$        P&                  P)                  VR4      R4      ;_uu_ 4       pVP+                  V4       RRR4       SP-                  ^ 4      pVP/                  R	4      p	. p
\1        V P3                  4       4       F  pW,          pVP5                  4       p\7        VP9                  R
V4      P;                  4       4      \7        VP9                  RV4      P;                  4       4      .p\=        \?        V4      4      pV	PA                  V4      pV	PA                  V4      pVPC                  V	4       VPE                  V	V4       VPG                  V	V4       VPI                  V	4      pV
PK                  V4       K  	  VPM                  V	\O        V
4      4       V
 F  pV	PQ                  V4       K  	  V	PS                  4       pVPU                  V	4       VPW                  V	V4       VPY                  V	4      pV	P[                  V4       V	P]                  4       p\#        \$        P&                  P)                  VR4      R4      ;_uu_ 4       pVP+                  V4       RRR4       \$        P^                  P9                  RR4      R9   d   VP                  Pa                  V^ 4      pVPc                  4       p\e        V4       F\  pVPg                  V4      p\        P                  ! VPi                  4       4       \        P                  ! VPk                  4       4       K^  	  \#        \$        P&                  P)                  VR4      R4      ;_uu_ 4       p\1        V P3                  4       4       F  pW,          pVP5                  4       p\7        VP9                  R
V4      P;                  4       4      \7        VP9                  RV4      P;                  4       4      .pVR,           \=        \?        V4      4      ,           pVP+                  V4       VP+                  R4       K  	  RRR4       R#   + '       g   i     EL; i  + '       g   i     EL; i  + '       g   i     R# ; i)z6
Helper function to write calibration table to files.
N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                  ,   < ] tR tRtV VVVV3R ltRtR# )*write_calibration_table.<locals>.MyEncoderi#  c                	v  < \        VSS34      '       d   VP                  4       # \        VSP                  4      '       d*   R VP                  4       R\	        VP
                  4      RR/# \        VS4      '       d$   RVP                  P                  R\	        V4      /# SP                  P                  W4      # )r   rj   CLSznumpy.arrayr   )
ru   to_dictrv   tolistr   rj   	__class__r1   JSONEncoderdefault)r#   objr  r  r  jsonnps   &&r$   r  2write_calibration_table.<locals>.MyEncoder.default$  s    #
K899{{}$#rzz**

gs399~um\\#011s}}55wCII##++D66r(   r0   N)r1   r2   r3   r4   r  r8   )r  r  r  r  r  s   r$   	MyEncoderr  #  s    	7 	7r(   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG0zcalibration.cache 
)   1)6r  flatbuffersro   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibrater  r  r  logginginfor  dumpsopenospathjoinwritert   Buildersortedkeysr  r   r  rZ  r   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrs   TrtTableStartDictVectorrz   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrP  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr~   r  r  r  r  r  s   &&                      @@@@@r$   write_calibration_tabler    s   
 LLLL]]LL&'8&9:;7 7D$$ 7 

,
<I	bggll3 23S	9	9T

9 
: 88A;D!!$'GN',,./"'>>#(,,y$/4467(,,x.3356
 CK '',))%0
w'2!!':6((1	i(# 0& $$Wc..AB#	''	2 $!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	A	AT

3 
B 
zz~~*C0H<%%77Q?	'')xA!q)ILL)LL*+ ! 
bggll3 34c	:	:d+0023C&+F~~'Hhll9d388:;hll8T2779:F #ICK 00EJJuJJt 4 
;	:g 
:	9	9L 
B	A	A 
;	:	:s%   -R/SCS/S 	S	S(	c                   V ^ 8H  P                  \        P                  4      pV ^ 8g  P                  \        P                  4      pVP                  4       pV P                  V,
          pV'       g   R# V\        V4      ,          \        V4      ,          pVR8  g   Q RV RV RV 24       hV P                  \        P                  4      pWqV,          V) V,          ,           ,          pV^ 8*  P                  4       ^ 8X  g   Q hV# )aj  Given a discrete distribution (may have not been normalized to 1),
smooth it by replacing zeros with eps multiplied by a scaling factor
and taking the corresponding amount off the non-zero values.
Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
     https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
Nr   zn_zeros=z, n_nonzeros=z, eps1=)r   ro   rx   sumsizer   )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1r  s   &&      r$   smooth_distributionr  o  s     Qu}}-H6//%--0KllnG'!Jw%
"33D#:Q'-
|74&QQ:88EMM"D(Nte{222DAI??!!!Kr(   c                   V ^8  d   QhRR/# )r   
model_pathr   r0   )ra   s   "r$   r   r     s     n n nr(   c                   \         P                  ! V P                  4       R R7      p\        ;QJ d0    R VP                  P
                   4       F  '       g   K   R# 	  R # ! R VP                  P
                   4       4      # )F)load_external_datac              3  N   "   T F  p\         P                  ! V4      x  K  	  R # 5ir   )r   uses_external_data).0
intializers   & r$   	<genexpr>*model_has_external_data.<locals>.<genexpr>  s!     mUlz#66zBBUls   #%T)r   loadas_posixr   graphr-  )r  models   & r$   model_has_external_datar    s^    IIj))+FE3mUZU`U`UlUlm33m3m3mUZU`U`UlUlmmmr(   c                    V ^8  d   QhRRRR/# )r   r  r   opt_model_pathr0   )ra   s   "r$   r   r     s     k kt kT kr(   c                    \        4       pVP                  4       Vn        \        P                  Vn        / pR.VR&   \        V P                  4       V3RR./VB pR# )z
    Generate model that applies graph optimization (constant folding, etc.)
    parameter model_path: path to the original onnx model
    parameter opt_model_path: path to the optimized onnx model
:return: optimized onnx model
ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  r  sess_optionkwargs_s   &&   r$   optimize_modelr    sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr(   c                   V ^8  d   QhRR/# r   r  r	   r0   )ra   s   "r$   r   r     s     7 7J 7r(   c                    RR/pV P                   '       d;   V P                    F*  pVP                  VP                  VP                  /4       K,  	  \        P
                  P                  W4       R# )z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props   &  r$   add_pre_process_metadatar    sV    .0CDN((D!!488TZZ"89 )KK6r(   c                    V ^8  d   QhRRRR/# r   r  r	   r   r   r0   )ra   s   "r$   r   r     s      *  r(   c                    V P                   '       d;   V P                    F*  pVP                  R8X  g   K  VP                  R8X  g   K)   R# 	  R# )zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s   & r$   model_has_pre_process_metadatar    s?    ((Dxx33

FY8Y ) r(   c                   V ^8  d   QhRR/# r  r0   )ra   s   "r$   r   r     s     7 7j 7r(   c                    R R/pV P                   '       d;   V P                    F*  pVP                  VP                  VP                  /4       K,  	  \        P
                  P                  W4       R# )
onnx.inferr  Nr  )r  r  r  s   &  r$   add_infer_metadatar    sV    "$78N%%A!!155!''"23 &KK6r(   c                    V ^8  d   QhRRRR/# r
  r0   )ra   s   "r$   r   r     s      J 4 r(   c                    V P                   '       d;   V P                    F*  pVP                  R 8X  g   K  VP                  R8X  g   K)   R# 	  R# )r  r  TFr  )r  r  s   & r$   model_has_infer_metadatar    s>    %%Auu$4G)G & r(   c                    V ^8  d   QhRRRR/# )r   r  r	   r   r  r0   )ra   s   "r$   r   r     s      Z C r(   c                    V P                    Uu. uF)  qP                  '       d   VP                  R 8X  g   K'  VNK+  	  pp\        V4      ^8w  d   \        R4      hV^ ,          P                  pV# u upi )r   z$Failed to find proper ai.onnx domain)opset_importdomainrz   r+   version)r  opsetai_onnx_domainopset_versions   &   r$   get_opset_versionr    sj    ).););m);<<<SXS_S_clSlee);Nm
>a?@@"1%--M ns
   $A0A0c               $    V ^8  d   QhRRRRRR/# )r   r  r	   weight_typerC   r   r0   )ra   s   "r$   r   r     s!     ! !
 ! !z !r(   c                   \        V 4      pTp\        VR V4      pV^8  d=   V\        P                  P                  8X  d   \
        P                  ! RV R24       ^pMEV^
8X  d   \
        P                  ! RV R24       M#V^
8  d   \
        P                  ! RV R24       ^pW28w  d+   \        P                  P                  W4      p \        V 4      p V # )rY   z$The original model opset version is z, which does not support quantization to float 8. Please update the model to opset >= 19. Automatically update the model to opset 19. Please verify the quantized model.ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Automatically update the model to opset 11. Please verify the quantized model.)
r  getattrr   r
   rT   r  warningversion_converterconvert_version&save_and_reload_model_with_shape_infer)r  r  r  target_opset_versionweight_quant_types   &&   r$   update_opset_versionr(    s    %e,M(]KHr/43C3C3P3PP2=/ B1 1	

  "	"	2=/ BM M	

 
	2=/ B1 1	

  ",&&66uS 7u=Lr(   c                    V ^8  d   QhRRRR/# )r   r  r   r   r	   r0   )ra   s   "r$   r   r     s      D Z r(   c                    \        V R 4      p\        P                  P                  \	        V 4      \	        V4      4       \        P
                  ! VP                  4       4      p\        V4       VP                  4        V# )z	-inferred)	ro  r   shape_inferenceinfer_shapes_pathr   r  r  r  unlink)r  inferred_model_pathr  s   &  r$   load_model_with_shape_inferr/    s`    6z;O**3z?C@S<TUII)2245Eu Lr(   c                    V ^8  d   QhRRRR/# )r   r  r	   r   r0   )ra   s   "r$   r   r     s     7 7* 7 7r(   c                @   \         P                  ! R R7      ;_uu_ 4       p\        P                  ! V 4      p\	        V4      P                  R4      p\        P                  ! W#P                  4       RR7       \        V4      uuRRR4       #   + '       g   i     R# ; i)z
ort.quant.)prefixz
model.onnxT)save_as_external_dataN)
tempfileTemporaryDirectoryr   deepcopyr   rl  r   
save_modelr  r/  )r  quant_tmp_dir
model_copyr  s   &   r$   r%  r%    sg    		$	$L	9	9]]]5)
-(11,?

$7$7$9QUV*:6	 
:	9	9	9s   A!BB	c                    V ^8  d   QhRRRR/# )r   r-  r
   r   r   r0   )ra   s   "r$   r   r     s      { } r(   c                &   V P                   \        P                  P                  \        P                  P                  39   d    \
        P                  P                  V 4      # \        R V P                   R\        V P                   ,           24      h)z&Only float type is supported. Weights z is )r   r   r
   r   r   r   r  to_arrayr+   r!   type_to_name)r-  s   &r$   r   r     su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r(   c                    V ^8  d   QhRRRR/# r   tensor_namer   r   r0   )ra   s   "r$   r   r     s     + +# +# +r(   c                    V R ,           # )_QuantizeLinearr0   r@  s   &r$   add_quant_suffixrD    s    ***r(   c                    V ^8  d   QhRRRR/# r?  r0   )ra   s   "r$   r   r   
  s     , , , ,r(   c                    V \         ,           # r   )QUANT_INPUT_SUFFIXrC  s   &r$   add_quant_input_suffixrH  
  s    +++r(   c                   V ^8  d   QhRR/# r   r   r   r0   )ra   s   "r$   r   r     s     2 2C 2r(   c                    V R ,           # )_QuantizeLinear_Outputr0   rC  s   &r$   add_quant_output_suffixrM    s    111r(   c                   V ^8  d   QhRR/# rJ  r0   )ra   s   "r$   r   r     s     - -s -r(   c                    V R ,           # )_DequantizeLinearr0   rC  s   &r$   add_dequant_suffixrQ    s    ,,,r(   c                   V ^8  d   QhRR/# rJ  r0   )ra   s   "r$   r   r     s     3 3S 3r(   c                    V R ,           # )_DequantizeLinear_Inputr0   rC  s   &r$   add_dequant_input_suffixrU    s    222r(   c                   V ^8  d   QhRR/# rJ  r0   )ra   s   "r$   r   r     s     / /c /r(   c                    V \         ,           # r   )DEQUANT_OUTPUT_SUFFIXrC  s   &r$   add_dequant_output_suffixrY    s    ...r(   ii iiiii rm   )NN)FN)FNNN)FF)rJ   )g-C6?)o
__future__r   r   r  r  r4  enumr   pathlibr   ro   r   r   r   r   r   r	   r
   r   r   r   onnx.helperr   r   r   r   onnx.referencer   onnxruntimer   r   r   onnx.reference.op_runr   ImportError__producer____version__onnx_domain	ms_domainQUANT_OP_NAMErG  DEQUANT_OP_NAMErX  r   MODEL_SIZE_THRESHOLDr   r  ru   r!  r  r=  r   r:   rC   r]   rL   rj   rN   rR   rP   rT   rX   rV   r   rt   rf   re   rh   rg   r  r  r  r   r   r   r   r   r   r  r   r  r   r   r+  r7  rF  rV  r\  ra  rf  ro  r  r  r  r  r  r  r  r  r  r  r(  r/  r%  r   rD  rH  rM  rQ  rU  rY  )ks   0r$   <module>rk     s   #   	      0 0 > > & Q Q - P P7 	 , $2 ' !  474Dq4Dq
SZ[fhiSjloHp*Q'*4Dqt  #> #>L$   V!4  %++g"6  %++g"6!!5;;x#8''  %    5;;q#DekkRU]b]h]hFi"j%++d%**"Eu{{SV^c^h^hGi!j!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#>BV[@\"]%++b"=u{{1TX?Y!Z  %++d%**"Eu{{SV^c^h^hGi!j  5;;vU[[#I5;;W\didodoKp"q!    5;;q#DekkRU]b]h]hFi"j%++c"DekkRT\a\f\fFg!h!!EKK$FTYafamamHn#o  5;;vU[[#I5;;W\didodoKp"q  5;;q#>AUZ@["\%++b"=u{{1TX?Y!Z A A 13h<~!H;G|:=zL ^@	< >% %8' '"#J0@R$Yx2n
k 77!H7+,2-3/  $ rs   "Z; [%[;	[[