+
    :i                        ^ RI t^ RIt^ RIt^ RIt^ RIt^ RIHtHtH	t	H
t
HtHt ^ RIHtHt ^ RIHt ^ RIHt ^ RIHtHt ^ RIHtHt ^ RIHt ^ R	IHt ^ R
IHtH t  ^ RI!H"t" ^ RI#H$t$ ^ RI%H&t& ^ RI'H(t(H)t) ^ RI*H
t+ ^ RI,H-t- ^ RI,H.t. ^ RI/H0t0 . ROt1 ! R R]Pd                  4      t3 ! R R]44      t5 ! R R4      t6 ! R R]4      t7 ! R R]4      t8 ! R R]]Pd                  4      t9R# )     N)config	serializesigutilstypestypingutils)Cache	CacheImpl)global_compiler_lock)
Dispatcher)NumbaPerformanceWarningNumbaValueError)Purposetypeof)get_current_device)wrap_arg)compile_cudaCUDACompiler)driver)get_context)cuda_target)missing_launch_config_msgnormalize_kernel_dimensions)r   cuda)_dispatcher)warnc                   8  a a ] tR t^(t oRt]RV 3R ll4       t]R 4       t]R 4       t	R t
]R 4       t]R 4       t]V 3R l4       tR	 tR
 t]R 4       t]R 4       t]R 4       t]R 4       t]R 4       tR tR tR tR tRR ltRR ltRR ltR tRtVtV ;t # )_Kernelzx
CUDA Kernel specialized for a given set of argument types. When called, this
object launches the kernel on the device.
c                d  < V'       d   \        R 4      h\        SV `	  4        RV n        RV n        Wn        W n        W@n        WPn        T;'       g    . V n	        RTRV
'       d   ^M^ /p\        4       P                  p\        V P
                  \        P                  V P                  V P                  VVVVVR7	      pVP                  pV P
                  P                   pVP"                  pVP$                  pVP'                  VP(                  VP*                  WEVVVV	4      w  ppV'       g   . pRVP-                  4       9   V n        V P.                  '       d   RVn        \2         Uu. uF  pRV 2VP-                  4       9   g   K  VNK!  	  ppV'       dr   \4        P6                  P9                  \4        P6                  P;                  \<        4      4      p\4        P6                  P?                  VR	4      pVPA                  V4       V F  pVPC                  V4       K  	  VPD                  V n#        VPH                  V n$        VPJ                  V n&        VV n'        VPP                  V n(        Wn        VP*                  V n        VPR                  V n)        . V n*        . V n+        . V n,        R# u upi )
z,Cannot compile a device function as a kernelFNfastmathoptdebuglineinfoinliner!   nvvm_optionscccudaCGGetIntrinsicHandleT__numba_wrapper_zcpp_function_wrappers.cu)-RuntimeErrorsuper__init__
objectmodeentry_pointpy_funcargtypesr$   r%   
extensionsr   compute_capabilityr   r   voidtarget_context__code__co_filenameco_firstlinenoprepare_cuda_kernellibraryfndescget_asm_strcooperativeneeds_cudadevrtcuda_fp16_math_funcsospathdirnameabspath__file__joinappendadd_linking_filename
entry_name	signaturetype_annotation_type_annotation_codelibrarycall_helperenvironment_referenced_environmentsliftedreload_init)selfr0   r1   linkr$   r%   r&   r!   r2   max_registersr"   devicer'   r(   crestgt_ctxcodefilenamelinenumlibkernelfnresbasedirfunctions_cu_pathfilepath	__class__s   &&&&&&&&&&&&              S/var/www/html/photoedit/myenv/lib/python3.14/site-packages/numba/cuda/dispatcher.pyr-   _Kernel.__init__.   sU   
 MNN     
 $** 1

  !44DLL%**dmm"&**%-#)%-)5!# %%||$$##%%11$,,27<2:G2?AV
 D 69JJ"&C0 B0b%bT*coo.?? r0 B ggoobggooh&?@G "W-G!IKK)*H  *  !++ $ 4 4++ &kk++(*%;Bs   )J-J-c                    V P                   # N)rM   rS   s   &rd   r:   _Kernel.library   s           c                    V P                   # rg   )rL   rh   s   &rd   rK   _Kernel.type_annotation   s    $$$rj   c                    V P                   # rg   )rP   rh   s   &rd   _find_referenced_environments%_Kernel._find_referenced_environments   s    ,,,rj   c                6    V P                   P                  4       # rg   )r5   codegenrh   s   &rd   rq   _Kernel.codegen   s    ""**,,rj   c                @    \        V P                  P                  4      # rg   )tuplerJ   argsrh   s   &rd   argument_types_Kernel.argument_types   s    T^^(())rj   c	                   < V P                  V 4      p	\        W	`  4        RV	n        Wn        W)n        W9n        RV	n        WIn        WYn	        Win
        Wyn        Wn        V	# )
Rebuild an instance.
N)__new__r,   r-   r/   r=   rI   rJ   rL   rM   r$   r%   rN   r2   )clsr=   rH   rJ   codelibraryr$   r%   rN   r2   instancerc   s   &&&&&&&&& rd   _rebuild_Kernel._rebuild   sb     ;;s#c%'#*"&$(! +$*(rj   c                    \        V P                  V P                  V P                  V P                  V P
                  V P                  V P                  V P                  R7      # )z
Reduce the instance for serialization.
Compiled definitions are serialized in PTX form.
Type annotation are discarded.
Thread, block and shared memory configuration are serialized.
Stream information is discarded.
)r=   rH   rJ   r|   r$   r%   rN   r2   )	dictr=   rI   rJ   rM   r$   r%   rN   r2   rh   s   &rd   _reduce_states_Kernel._reduce_states   sL      0 0t"nn$:K:K**t}} $ 0 0T__N 	Nrj   c                :    V P                   P                  4        R# )z'
Force binding to current CUDA context
N)rM   
get_cufuncrh   s   &rd   bind_Kernel.bind   s     	$$&rj   c                ^    V P                   P                  4       P                  P                  # )z>
The number of registers used by each thread for this kernel.
)rM   r   attrsregsrh   s   &rd   regs_per_thread_Kernel.regs_per_thread   s%    
   ++-33888rj   c                ^    V P                   P                  4       P                  P                  # )z4
The amount of constant memory used by this kernel.
)rM   r   r   constrh   s   &rd   const_mem_size_Kernel.const_mem_size   %    
   ++-33999rj   c                ^    V P                   P                  4       P                  P                  # )z=
The amount of shared memory used per block for this kernel.
)rM   r   r   sharedrh   s   &rd   shared_mem_per_block_Kernel.shared_mem_per_block   s%    
   ++-33:::rj   c                ^    V P                   P                  4       P                  P                  # )z*
The maximum allowable threads per block.
)rM   r   r   
maxthreadsrh   s   &rd   max_threads_per_block_Kernel.max_threads_per_block   s%    
   ++-33>>>rj   c                ^    V P                   P                  4       P                  P                  # )z=
The amount of local memory used per thread for this kernel.
)rM   r   r   localrh   s   &rd   local_mem_per_thread_Kernel.local_mem_per_thread   r   rj   c                6    V P                   P                  4       # )z&
Returns the LLVM IR for this kernel.
)rM   get_llvm_strrh   s   &rd   inspect_llvm_Kernel.inspect_llvm   s       --//rj   c                :    V P                   P                  VR7      # )z'
Returns the PTX code for this kernel.
)r(   )rM   r<   )rS   r(   s   &&rd   inspect_asm_Kernel.inspect_asm   s       ,,,33rj   c                6    V P                   P                  4       # )z^
Returns the CFG of the SASS for this kernel.

Requires nvdisasm to be available on the PATH.
)rM   get_sass_cfgrh   s   &rd   inspect_sass_cfg_Kernel.inspect_sass_cfg   s       --//rj   c                6    V P                   P                  4       # )zX
Returns the SASS code for this kernel.

Requires nvdisasm to be available on the PATH.
)rM   get_sassrh   s   &rd   inspect_sass_Kernel.inspect_sass   s       ))++rj   c                   V P                   f   \        R4      hVf   \        P                  p\	        V P
                  : RV P                  : 2VR7       \	        RVR7       \	        V P                   VR7       \	        RVR7       R# )
Produce a dump of the Python source of this function annotated with the
corresponding Numba IR and type information. The dump is written to
*file*, or *sys.stdout* if *file* is *None*.
Nz Type annotation is not available filezP--------------------------------------------------------------------------------zP================================================================================)rL   
ValueErrorsysstdoutprintrI   rv   )rS   r   s   &&rd   inspect_types_Kernel.inspect_types  sg       (?@@<::D$*=*=>TJhT"d##$/hT"rj   c                   \        4       pV P                  P                  4       p\        V\        4      '       d   \
        P                  ! R V4      pVP                  VVV4      pVP                  P                  pWV,          # )a  
Calculates the maximum number of blocks that can be launched for this
kernel in a cooperative grid in the current context, for the given block
and dynamic shared memory sizes.

:param blockdim: Block dimensions, either as a scalar for a 1D block, or
                 a tuple for 2D or 3D blocks.
:param dynsmemsize: Dynamic shared memory size in bytes.
:return: The maximum number of blocks in the grid.
c                     W,          # rg    )xys   &&rd   <lambda>5_Kernel.max_cooperative_grid_blocks.<locals>.<lambda>&  s    QUrj   )
r   rM   r   
isinstancert   	functoolsreduce$get_active_blocks_per_multiprocessorrV   MULTIPROCESSOR_COUNT)rS   blockdimdynsmemsizectxcufuncactive_per_smsm_counts   &&&    rd   max_cooperative_grid_blocks#_Kernel.max_cooperative_grid_blocks  sr     m""--/h&& ''(:HEH@@AIALN ::22''rj   c                  a V P                   P                  4       oV P                  '       d   SP                  R ,           pSP                  P                  V4      w  rxV\        P                  ! \        P                  4      8X  g   Q h\        P                  ! 4       p	VP                  ^ VR7       . p
. p\        V P                  V4       F  w  rV P                  WWJV4       K  	  \        P                  '       d!   \        P                  P!                  ^ 4      pMRpT;'       d    VP"                  ;'       g    Tp\        P$                  ! SP"                  .VOVOVNVNVN5RV P&                  /  V P                  '       Ed$   \        P(                  ! \        P*                  ! X	4      XX4       V	P,                  ^ 8w  d   V3R lpR Uu. uF  pV! RV,           4      NK  	  ppR Uu. uF  pV! RV,           4      NK  	  ppV	P,                  pV P.                  P1                  V4      w  pppVf   RpM4Vw  ppp\2        P4                  P7                  V4      pR	V: R
V: RV: R2pV: RV: RV: 2pV'       d!   V: RV^ ,          : 23VR,          ,           pMV3pV! V!  hV
 F
  pV! 4        K  	  R# u upi u upi )__errcode__)streamNr=   c                    < SP                   P                  SP                  : R V : R 24      w  r\        P                  ! 4       p\
        P                  ! \        P                  ! V4      W4       VP                  # )__)	moduleget_global_symbolrH   ctypesc_intr   device_to_host	addressofvalue)rH   memszvalr   s   &   rd   load_symbol#_Kernel.launch.<locals>.load_symbolS  s\    $mm==?E{{?C?E FGC !,,.C))&*:*:3*?I99$rj   zyxtidctaid zIn function z, file z, line z, ztid=z ctaid=z: :   NN)rM   r   r$   rH   r   r   r   sizeofr   memsetziprv   _prepare_argsr   USE_NV_BINDINGbindingCUstreamhandlelaunch_kernelr=   r   r   r   rN   get_exceptionr@   rA   rC   )rS   ru   griddimr   r   	sharedmemexcnameexcmemexcszexcvalretr
kernelargstvzero_streamstream_handler   ir   r   rY   excclsexc_argsloclocinfosymrb   linenoprefixwbr   s   &&&&&&                        @rd   launch_Kernel.launch-  st   ""--/:::kkM1G"MM;;GDMFFMM&,,7777\\^FMM!FM+ 
++T2DAqV:> 3     ..11!4KK006==??K 	V]] 	;%	;&	; '	; +		;
 (	; *.)9)9	; :::!!&"2"26":FEJ||q % 8==u!{519-u=;@A5aWq[15A||(,(8(8(F(Ft(L%#; G,/)C6!wwx8HFIFNFLOG 18eD,2HQK @B  %H  &wHh'' BD / >As   J=-Kc                   \        V P                  4       F  pVP                  VVVVR7      w  rK  	  \        V\        P
                  4      '       Ed   \        V4      P                  WC4      p\        P                  p\        P                  ! ^ 4      p	\        P                  ! ^ 4      p
V! VP                  4      pV! VP                  P                  4      p\        P                  ! V4      p\        P                   '       d   \#        V4      p\        P                  ! V4      pVP%                  V	4       VP%                  V
4       VP%                  V4       VP%                  V4       VP%                  V4       \'        VP(                  4       F+  pVP%                  V! VP*                  V,          4      4       K-  	  \'        VP(                  4       F+  pVP%                  V! VP,                  V,          4      4       K-  	  R# \        V\        P.                  4      '       d1   \1        \        RV,          4      ! V4      pVP%                  V4       R# V\        P2                  8X  d[   \        P4                  ! \6        P2                  ! V4      P9                  \6        P:                  4      4      pVP%                  V4       R# V\        P<                  8X  d*   \        P>                  ! V4      pVP%                  V4       R# V\        P@                  8X  d*   \        PB                  ! V4      pVP%                  V4       R# V\        PD                  8X  d3   \        PF                  ! \#        V4      4      pVP%                  V4       R# V\        PH                  8X  da   VP%                  \        PB                  ! VPJ                  4      4       VP%                  \        PB                  ! VPL                  4      4       R# V\        PN                  8X  da   VP%                  \        P>                  ! VPJ                  4      4       VP%                  \        P>                  ! VPL                  4      4       R# \        V\        PP                  \        PR                  34      '       dE   VP%                  \        PT                  ! VP9                  \6        PV                  4      4      4       R# \        V\        PX                  4      '       do   \        V4      P                  WC4      pVPZ                  p\        P                   '       d    \        P                  ! \#        V4      4      pVP%                  V4       R# \        V\        P\                  4      '       dG   \_        V4      \_        V4      8X  g   Q h\a        W4       F  w  ppV Pc                  VVW4V4       K  	  R# \        V\        Pd                  4      '       d,    V Pc                  VP                  VPf                  W4V4       R# \i        W4      h  \h         d    \i        Y4      hi ; i)z6
Convert arguments to ctypes and append to kernelargs
)r   r   zc_%sN)5reversedr2   prepare_argsr   r   Arrayr   	to_devicer   	c_ssize_tc_void_psizedtypeitemsizer   device_pointerr   intrF   rangendimshapestridesIntegergetattrfloat16c_uint16npviewuint16float64c_doublefloat32c_floatbooleanc_uint8	complex64realimag
complex128
NPDatetimeNPTimedeltac_int64int64Recorddevice_ctypes_pointer	BaseTuplelenr   r   
EnumMemberr   NotImplementedError)rS   tyr   r   r   r   	extensiondevaryc_intpmeminfoparentnitemsr  ptrdataaxcvaldevrecr   r   s   &&&&&&              rd   r   _Kernel._prepare_argsu  sG    "$//2I,,	 - GB 3 b%++&&c],,T:F%%Fooa(G__Q'FFKK(Ffll334H''/C$$$#h??3'Dg&f%f%h'd#FKK(!!&b)9":; )FKK(!!&);"<= ) EMM**66B;/4Dd#5== ??2::c?#7#7		#BCDd#5== ??3'Dd#5== >>#&Dd#5== >>#c(+Dd#5??"fnnSXX67fnnSXX675###foochh78foochh78U--u/@/@ABBfnnSXXbhh-?@AELL))c],,T:F..C$$$ooc#h/c"EOO,,r7c#h&&&B1""1azB % E,,--3""HHciiz &b..	 ' 3)"223s   :(X/ /Y)rM   rP   rL   r1   rN   r=   r$   rI   r/   rO   r2   r;   rQ   r%   r.   r0   rR   rJ   r5   )	NFFFFNNTFrg   )r   r   r   )!__name__
__module____qualname____firstlineno____doc__r   r-   propertyr:   rK   rn   rq   rv   classmethodr~   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   __static_attributes____classdictcell____classcell__rc   __classdict__s   @@rd   r   r   (   s*    
 Z Zx ! ! % %- - - * *  *N' 9 9 : : ; ; ? ? : :040,#"(,FP\/ \/rj   r   c                   2   a  ] tR tRt o R tR tR tRtV tR# )ForAlli  c                t    V^ 8  d   \        RV,          4      hWn        W n        W0n        W@n        WPn        R# )r   z0Can't create ForAll with negative task count: %sN)r   
dispatcherntasksthread_per_blockr   r   )rS   rK  rL  tpbr   r   s   &&&&&&rd   r-   ForAll.__init__  s;    A:O%& ' '$ #"rj   c                X   V P                   ^ 8X  d   R# V P                  P                  '       d   V P                  pMV P                  P                  ! V!  pV P	                  V4      pV P                   V,           ^,
          V,          pW$W0P
                  V P                  3,          ! V!  # )r   N)rL  rK  specialized
specialize_compute_thread_per_blockr   r   )rS   ru   rQ  r   r   s   &*   rd   __call__ForAll.__call__  s    ;;!??&&&//K//44d;K11+>;;)A-(:Hkk>>* +,02 	2rj   c                &   V P                   pV^ 8w  d   V# \        4       p\        \        VP                  P                  4       4      4      p\        VP                  P                  4       ^ V P                  RR7      pVP                  ! R/ VB w  rbV# )r   i   )funcb2d_funcmemsizeblocksizelimitr   )rM  r   nextiter	overloadsvaluesr   rM   r   r   get_max_potential_block_size)rS   rK  rN  r   r]   kwargs_s   &&     rd   rS   ForAll._compute_thread_per_block  s    ##!8J -C $z33::<=>F((335#	F 55??FAJrj   )rK  rL  r   r   rM  N)	r<  r=  r>  r?  r-   rT  rS  rC  rD  rG  s   @rd   rI  rI    s     #2 rj   rI  c                   ,   a  ] tR tRt o R tR tRtV tR# )_LaunchConfigurationi  c                   Wn         W n        W0n        W@n        WPn        \
        P                  '       dL   ^pV^ ,          V^,          ,          V^,          ,          pWv8  d   RV R2p\        \        V4      4       R# R# R# )   z
Grid size zB will likely result in GPU under-utilization due to low occupancy.N)	rK  r   r   r   r   r   CUDA_LOW_OCCUPANCY_WARNINGSr   r   )	rS   rK  r   r   r   r   min_grid_size	grid_sizemsgs	   &&&&&&   rd   r-   _LaunchConfiguration.__init__  s|    $ "---  M
WQZ/'!*<I(#I; /A A,S12 ) .rj   c                    V P                   P                  WP                  V P                  V P                  V P
                  4      # rg   )rK  callr   r   r   r   rS   ru   s   &*rd   rT  _LaunchConfiguration.__call__  s4    ##D,,$(KKA 	Arj   )r   rK  r   r   r   N)r<  r=  r>  r?  r-   rT  rC  rD  rc  s   @rd   re  re    s     3.A Arj   re  c                   2   a  ] tR tRt o R tR tR tRtV tR# )CUDACacheImpli  c                "    VP                  4       # rg   )r   )rS   r]   s   &&rd   r   CUDACacheImpl.reduce   s    $$&&rj   c                .    \         P                  ! R/ VB # )Nr   )r   r~   )rS   r5   payloads   &&&rd   rebuildCUDACacheImpl.rebuild#  s    *'**rj   c                    R # )Tr   )rS   rW   s   &&rd   check_cachableCUDACacheImpl.check_cachable&  s     rj   r   N)	r<  r=  r>  r?  r   rw  rz  rC  rD  rc  s   @rd   rr  rr    s     '+ rj   rr  c                   :   a a ] tR tRt oRt]tV 3R ltRtVt	V ;t
# )	CUDACachei1  zK
Implements a cache that saves and loads CUDA kernels and compile results.
c                   < ^ RI Hp V! R4      ;_uu_ 4        \        SV `  W4      uuRRR4       #   + '       g   i     R# ; i)r   )target_overrider   N)numba.core.target_extensionr  r,   load_overload)rS   sigr5   r  rc   s   &&& rd   r  CUDACache.load_overload7  s0     	@V$$7(= %$$$s	   2A	r   )r<  r=  r>  r?  r@  rr  _impl_classr  rC  rD  rE  rF  s   @@rd   r}  r}  1  s       K> >rj   r}  c                   t  a a ] tR tRt oRtRt]t]3V 3R llt	]
R 4       tR t]P                  ! ^R7      R#R l4       tR	 tR$R
 lt]
R 4       tR tR tR tR tR t]
R 4       tR%R ltR%R ltR%R ltR%R ltR%R ltR tR%R ltR t R t!R%R lt"R%R lt#R%R lt$R%R lt%R%R lt&]'R  4       t(R! t)R"t*Vt+V ;t,# )&CUDADispatcheri@  az  
CUDA Dispatcher object. When configured and called, the dispatcher will
specialize itself for the given arguments (if no suitable specialized
version already exists) & compute capability, and launch on the device
associated with the current context.

Dispatcher objects are not to be constructed by the user, but instead are
created using the :func:`numba.cuda.jit` decorator.
Fc                F   < \         SV `  WVR 7       RV n        / V n        R# ))targetoptionspipeline_classFN)r,   r-   _specializedspecializations)rS   r0   r  r  rc   s   &&&&rd   r-   CUDADispatcher.__init__R  s.    (6 	 	8 "  "rj   c                .    \         P                  ! V 4      # rg   )
cuda_typesr  rh   s   &rd   _numba_type_CUDADispatcher._numba_type_b  s    ((..rj   c                :    \        V P                  4      V n        R # rg   )r}  r0   _cacherh   s   &rd   enable_cachingCUDADispatcher.enable_cachingf  s    -rj   )maxsizec                6    \        W4      w  r\        WW#V4      # rg   )r   re  )rS   r   r   r   r   s   &&&&&rd   	configureCUDADispatcher.configurei  s    7J#D8YOOrj   c                X    \        V4      R9  d   \        R4      hV P                  ! V!  # )   z.must specify at least the griddim and blockdim)r        )r+  r   r  ro  s   &&rd   __getitem__CUDADispatcher.__getitem__n  s)    t9I%MNN~~t$$rj   c                    \        WW#VR7      # )a  Returns a 1D-configured dispatcher for a given number of tasks.

This assumes that:

- the kernel maps the Global Thread ID ``cuda.grid(1)`` to tasks on a
  1-1 basis.
- the kernel checks that the Global Thread ID is upper-bounded by
  ``ntasks``, and does nothing if it is not.

:param ntasks: The number of tasks.
:param tpb: The size of a block. An appropriate value is chosen if this
            parameter is not supplied.
:param stream: The stream on which the configured dispatcher will be
               launched.
:param sharedmem: The number of bytes of dynamic shared memory required
                  by the kernel.
:return: A configured dispatcher, ready to launch on a set of
         arguments.)rN  r   r   )rI  )rS   rL  rN  r   r   s   &&&&&rd   forallCUDADispatcher.foralls  s    ( diPPrj   c                8    V P                   P                  R4      # )a  
A list of objects that must have a `prepare_args` function. When a
specialized kernel is called, each argument will be passed through
to the `prepare_args` (from the last object in this list to the
first). The arguments to `prepare_args` are:

- `ty` the numba type of the argument
- `val` the argument value itself
- `stream` the CUDA stream used for the current call to the kernel
- `retr` a list of zero-arg functions that you may want to append
  post-call cleanup work to.

The `prepare_args` function must return a tuple `(ty, val)`, which
will be passed in turn to the next right-most `extension`. After all
the extensions have been called, the resulting `(ty, val)` will be
passed into Numba's default argument marshalling logic.
r2   )r  getrh   s   &rd   r2   CUDADispatcher.extensions  s    & !!%%l33rj   c                     \        \        4      hrg   )r   r   )rS   ru   r`  s   &*,rd   rT  CUDADispatcher.__call__  s    233rj   c                    V P                   '       d.   \        \        V P                  P	                  4       4      4      pM"\
        P                  P                  ! V .VO5!  pVP                  WW4V4       R# )z:
Compile if necessary and invoke this kernel with *args*.
N)	rQ  r[  r\  r]  r^  r   r   
_cuda_callr  )rS   ru   r   r   r   r   r]   s   &&&&&& rd   rn  CUDADispatcher.call  sW     $t~~44678F ++66tCdCFdXyArj   c                    V'       d   Q hV Uu. uF  q0P                  V4      NK  	  ppV P                  \        V4      4      # u upi rg   )typeof_pyvalcompilert   )rS   ru   kwsar1   s   &*,  rd   _compile_for_args CUDADispatcher._compile_for_args  s>    w267$Q%%a($7||E(O,, 8s   Ac                     \        V\        P                  4      #   \        \        3 dQ    \
        P                  ! T4      '       d3   \        \
        P                  ! TR R7      \        P                  4      u # h i ; i)F)sync)r   r   argumentr   r   r   is_cuda_arrayas_cuda_array)rS   r   s   &&rd   r  CUDADispatcher.typeof_pyval  sk    		#w//00, 	!!#&& d005A%..0 0 	s    ,A=	0A=;A=c                  a  S P                   '       d   \        R4      h\        4       P                  p\        ;QJ d    . V 3R lV 4       F  NK  	  5M! V 3R lV 4       4      pS P
                  P                  W#34      pV'       d   V# S P                  p\        S P                  VR7      pVP                  V4       VP                  4        RVn        VS P
                  W#3&   V# )zL
Create a new instance of this dispatcher specialized for the given
*args*.
zDispatcher already specializedc              3   F   <"   T F  pSP                  V4      x  K  	  R # 5irg   )r  ).0r  rS   s   & rd   	<genexpr>,CUDADispatcher.specialize.<locals>.<genexpr>  s     <t!**1--ts   !)r  T)rQ  r+   r   r3   rt   r  r  r  r  r0   r  disable_compiler  )rS   ru   r(   r1   specializationr  s   f*    rd   rR  CUDADispatcher.specialize  s    
 ?@@!445<t<55<t<<--112.A!!**'6CEx(&&(&*#-;R\*rj   c                    V P                   # )z.
True if the Dispatcher has been specialized.
)r  rh   s   &rd   rQ  CUDADispatcher.specialized  s    
    rj   c                d   Ve(   V P                   VP                  ,          P                  # V P                  '       d7   \	        \        V P                   P                  4       4      4      P                  # V P                   P                  4        UUu/ uF  w  r#W#P                  bK  	  upp# u uppi )a  
Returns the number of registers used by each thread in this kernel for
the device in the current context.

:param signature: The signature of the compiled kernel to get register
                  usage for. This may be omitted for a specialized
                  kernel.
:return: The number of registers used by the compiled variant of the
         kernel for the given signature and current device.
)r]  ru   r   rQ  r[  r\  r^  itemsrS   rJ   r  overloads   &&  rd   get_regs_per_thread"CUDADispatcher.get_regs_per_thread  s      >>)..1AAAT^^22456FFF *.)=)=)?A)? 111)?A A A   B,c                d   Ve(   V P                   VP                  ,          P                  # V P                  '       d7   \	        \        V P                   P                  4       4      4      P                  # V P                   P                  4        UUu/ uF  w  r#W#P                  bK  	  upp# u uppi )a  
Returns the size in bytes of constant memory used by this kernel for
the device in the current context.

:param signature: The signature of the compiled kernel to get constant
                  memory usage for. This may be omitted for a
                  specialized kernel.
:return: The size in bytes of constant memory allocated by the
         compiled variant of the kernel for the given signature and
         current device.
)r]  ru   r   rQ  r[  r\  r^  r  r  s   &&  rd   get_const_mem_size!CUDADispatcher.get_const_mem_size  s      >>)..1@@@T^^22456EEE *.)=)=)?A)? 000)?A A Ar  c                d   Ve(   V P                   VP                  ,          P                  # V P                  '       d7   \	        \        V P                   P                  4       4      4      P                  # V P                   P                  4        UUu/ uF  w  r#W#P                  bK  	  upp# u uppi )a  
Returns the size in bytes of statically allocated shared memory
for this kernel.

:param signature: The signature of the compiled kernel to get shared
                  memory usage for. This may be omitted for a
                  specialized kernel.
:return: The amount of shared memory allocated by the compiled variant
         of the kernel for the given signature and current device.
)r]  ru   r   rQ  r[  r\  r^  r  r  s   &&  rd   get_shared_mem_per_block'CUDADispatcher.get_shared_mem_per_block        >>)..1FFFT^^22456KKK *.)=)=)?A)? 666)?A A Ar  c                d   Ve(   V P                   VP                  ,          P                  # V P                  '       d7   \	        \        V P                   P                  4       4      4      P                  # V P                   P                  4        UUu/ uF  w  r#W#P                  bK  	  upp# u uppi )a  
Returns the maximum allowable number of threads per block
for this kernel. Exceeding this threshold will result in
the kernel failing to launch.

:param signature: The signature of the compiled kernel to get the max
                  threads per block for. This may be omitted for a
                  specialized kernel.
:return: The maximum allowable threads per block for the compiled
         variant of the kernel for the given signature and current
         device.
)r]  ru   r   rQ  r[  r\  r^  r  r  s   &&  rd   get_max_threads_per_block(CUDADispatcher.get_max_threads_per_block  s      >>)..1GGGT^^22456LLL *.)=)=)?A)? 777)?A A Ar  c                d   Ve(   V P                   VP                  ,          P                  # V P                  '       d7   \	        \        V P                   P                  4       4      4      P                  # V P                   P                  4        UUu/ uF  w  r#W#P                  bK  	  upp# u uppi )ay  
Returns the size in bytes of local memory per thread
for this kernel.

:param signature: The signature of the compiled kernel to get local
                  memory usage for. This may be omitted for a
                  specialized kernel.
:return: The amount of local memory allocated by the compiled variant
         of the kernel for the given signature and current device.
)r]  ru   r   rQ  r[  r\  r^  r  r  s   &&  rd   get_local_mem_per_thread'CUDADispatcher.get_local_mem_per_thread/  r  r  c                4   V P                   '       d   V P                  \        V4      4       V P                  P                  pRP                  V4      p\        P                  ! WCV P                  R7      p\        P                  ! V P                  4      pWVW3# )z
Get a typing.ConcreteTemplate for this dispatcher and the given
*args* and *kws* types.  This allows resolution of the return type.

A (template, pysig, args, kws) tuple is returned.
zCallTemplate({0}))key
signatures)_can_compilecompile_devicert   r0   r<  formatr   make_concrete_templatenopython_signaturesr   pysignature)rS   ru   r  	func_namerH   call_templatepysigs   &&&    rd   get_call_template CUDADispatcher.get_call_templateB  s~     d, LL))	")))455D,D,DF!!$,,/T..rj   c                   WP                   9  Ed0   V P                  ;_uu_ 4        V P                  P                  R4      pV P                  P                  R4      pV P                  P                  R4      pV P                  P                  R4      pRV P                  P                  R4      '       d   ^M^ RV/p\	        4       P
                  p\        V P                  W!VVVVVVR7	      p	WP                   V&   V	P                  P                  V	P                  V	P                  V	P                  .4       RRR4       V	# V P                   V,          p	V	#   + '       g   i     X	# ; i)zCompile the device function for the given argument types.

Each signature is compiled once by caching the compiled function inside
this object.

Returns the `CompileResult`.
r$   r%   r&   r!   r"   r#   N)r]  _compiling_counterr  r  r   r3   r   r0   r5   insert_user_functionr/   r;   r:   )
rS   ru   return_typer$   r%   r&   r!   r'   r(   rW   s
   &&&       rd   r  CUDADispatcher.compile_device]  s=    ~~%(((**..w7--11*=++//9--11*=  2 2 6 6u = =11 
 ()<<#DLL+*/-5+1-51=')+ (,t$##889I9I9=:>,,I- )8  >>$'D9 )(8 s   DEE'	c                    V Uu. uF  q3P                   NK  	  ppV P                  WAR R7       WP                  V&   R# u upi )Tr   N)_code_insertr]  )rS   r]   r1   r  c_sigs   &&&  rd   add_overloadCUDADispatcher.add_overload  s9    "*+(Q(+U.#)x  ,s   >c                   \         P                  ! V4      w  r#Ve   V\        P                  8X  g   Q hV P                  '       d-   \        \        V P                  P                  4       4      4      # V P                  P                  V4      pVe   V# V P                  P                  WP                  4      pVe!   V P                  V;;,          ^,          uu&   MV P                  V;;,          ^,          uu&   V P                  '       g   \!        R4      h\#        V P$                  V3/ V P&                  B pVP)                  4        V P                  P+                  W4       V P-                  WB4       V# )zg
Compile and bind to the current context a version of this kernel
specialized for the given signature.
zCompilation disabled)r   normalize_signaturer   nonerQ  r[  r\  r]  r^  r  r  r  	targetctx_cache_hits_cache_missesr  r+   r   r0   r  r   save_overloadr  )rS   r  r1   r  r]   s   &&   rd   r  CUDADispatcher.compile  s'   
 !) < <S A"kUZZ&??? T^^224566^^''1F! **3?S!Q&! s#q(#$$$"#9::T\\8Jt7I7IJFKKMKK%%c2&+rj   c                   V P                   P                  R4      pVeU   V'       d,   V P                  V,          P                  P	                  4       # V P                  V,          P                  4       # V'       dG   V P                  P                  4        UUu/ uF  w  r4W4P                  P	                  4       bK!  	  upp# V P                  P                  4        UUu/ uF  w  r4W4P                  4       bK  	  upp# u uppi u uppi )z
Return the LLVM IR for this kernel.

:param signature: A tuple of argument types.
:return: The LLVM IR for the given signature, or a dict of LLVM IR
         for all previously-encountered signatures.

rV   )r  r  r]  r:   r   r   r  )rS   rJ   rV   r  r  s   &&   rd   r   CUDADispatcher.inspect_llvm  s     ##''1 ~~i088EEGG~~i0==??-1^^-A-A-CE-CMC --::<<-CE E .2^^-A-A-CE-CMC 2244-CE EEEs   %C>Dc                D   \        4       P                  pV P                  P                  R4      pVeW   V'       d-   V P                  V,          P
                  P                  V4      # V P                  V,          P                  V4      # V'       dH   V P                  P                  4        UUu/ uF   w  rEWEP
                  P                  V4      bK"  	  upp# V P                  P                  4        UUu/ uF  w  rEWEP                  V4      bK  	  upp# u uppi u uppi )z
Return this kernel's PTX assembly code for for the device in the
current context.

:param signature: A tuple of argument types.
:return: The PTX code for the given signature, or a dict of PTX codes
         for all previously-encountered signatures.
rV   )	r   r3   r  r  r]  r:   r<   r   r  )rS   rJ   r(   rV   r  r  s   &&    rd   r   CUDADispatcher.inspect_asm  s     !44##''1 ~~i088DDRHH~~i0<<R@@-1^^-A-A-CE-CMC --99"==-CE E .2^^-A-A-CE-CMC 11"55-CE EEEs   /&D6Dc                (   V P                   P                  R4      '       d   \        R4      hVe"   V P                  V,          P	                  4       # V P                  P                  4        UUu/ uF  w  r#W#P	                  4       bK  	  upp# u uppi )aK  
Return this kernel's CFG for the device in the current context.

:param signature: A tuple of argument types.
:return: The CFG for the given signature, or a dict of CFGs
         for all previously-encountered signatures.

The CFG for the device in the current context is returned.

Requires nvdisasm to be available on the PATH.
rV   z'Cannot get the CFG of a device function)r  r  r+   r]  r   r  rS   rJ   r  defns   &&  rd   r   CUDADispatcher.inspect_sass_cfg  s     !!(++HII >>),==?? &*^^%9%9%;=%;	 ..00%;= = =   /Bc                (   V P                   P                  R4      '       d   \        R4      hVe"   V P                  V,          P	                  4       # V P                  P                  4        UUu/ uF  w  r#W#P	                  4       bK  	  upp# u uppi )ag  
Return this kernel's SASS assembly code for for the device in the
current context.

:param signature: A tuple of argument types.
:return: The SASS code for the given signature, or a dict of SASS codes
         for all previously-encountered signatures.

SASS for the device in the current context is returned.

Requires nvdisasm to be available on the PATH.
rV   z(Cannot inspect SASS of a device function)r  r  r+   r]  r   r  r  s   &&  rd   r   CUDADispatcher.inspect_sass  s     !!(++IJJ >>),99;; &*^^%9%9%;=%;	 **,,%;= = =r  c                    Vf   \         P                  pV P                  P                  4        F  w  r#VP	                  VR7       K  	  R# )r   Nr   )r   r   r]  r  r   )rS   r   ra  r  s   &&  rd   r   CUDADispatcher.inspect_types  s<     <::D~~++-GAD) .rj   c                    V ! W4      pV# )ry   r   )r{   r0   r  r}   s   &&& rd   r~   CUDADispatcher._rebuild  s    
 w.rj   c                D    \        V P                  V P                  R7      # )zL
Reduce the instance for serialization.
Compiled definitions are discarded.
)r0   r  )r   r0   r  rh   s   &rd   r   CUDADispatcher._reduce_states  s     
 DLL"&"4"46 	6rj   )r  r  r  r;  )r   r   r   rg   )-r<  r=  r>  r?  r@  
_fold_argsr   targetdescrr   r-   rA  r  r  r   	lru_cacher  r  r  r2   rT  rn  r  r  rR  rQ  r  r  r  r  r  r  r  r  r  r   r   r   r   r   rB  r~   r   rC  rD  rE  rF  s   @@rd   r  r  @  s     JK>J "  / /. %P &P%
Q, 4 4(4	B-0 ! !A&A(A&A*A&/6%N*
"HE.E0=*=,
*  6 6rj   r  )hsinhcoshloghlog10hlog2hexphexp10hexp2hsqrthrsqrthfloorhceilhrcphrinthtrunchdiv):numpyr  r@   r   r   r   
numba.corer   r   r   r   r   r   numba.core.cachingr	   r
   numba.core.compiler_lockr   numba.core.dispatcherr   numba.core.errorsr   r   numba.core.typing.typeofr   r   numba.cuda.apir   numba.cuda.argsr   numba.cuda.compilerr   r   numba.cuda.cudadrvr   numba.cuda.cudadrv.devicesr   numba.cuda.descriptorr   numba.cuda.errorsr   r   
numba.cudar  numbar   r   warningsr   r?   ReduceMixinr   objectrI  re  rr  r}  r  r   rj   rd   <module>r-     s     	 
   H H / 9 , F 4 - $ : % 2 -< *   * i/i## i/X+V +\A A:I $> >a6Z!6!6 a6rj   