+
    :i_                     ,   ^ RI Ht ^ RIHtHt ^ RIHt ^ RIHtH	t	 ^ RI
Ht ^ RIHt ^ RIHt ^ RIHt R	 t]R
 4       t]R 4       t]R 4       t]! ]P,                  ! ]4      RRR7      R 4       t]R 4       tR t]R 4       t]R 4       t]R 4       tR# )    )ir)cudatypes)cgutils)RequireLiteralValueNumbaValueError)	signature)overload_attribute)	nvvmutils)	intrinsicc                     V P                   pV^8X  d   \        P                  pM8VR9   d'   \        P                  ! \        P                  V4      pM\	        R4      h\        V\        P                  4      # )   zargument can only be 1, 2, 3)      )literal_valuer   int64UniTupler   r	   int32)ndimvalrestypes   &  S/var/www/html/photoedit/myenv/lib/python3.14/site-packages/numba/cuda/intrinsics.py_type_grid_functionr      sU    


C
ax++	..c2<==Wekk**    c                z    \        V\        P                  4      '       g   \        V4      h\	        V4      pR pW#3# )a  grid(ndim)

Return the absolute position of the current thread in the entire grid of
blocks.  *ndim* should correspond to the number of dimensions declared when
instantiating the kernel. If *ndim* is 1, a single integer is returned.
If *ndim* is 2 or 3, a tuple of the given number of integers is returned.

Computation of the first integer is as follows::

    cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x

and is similar for the other two indices, but using the ``y`` and ``z``
attributes.
c                 &   VP                   pV\        P                  8X  d   \        P                  ! V^R7      # \        V\        P                  4      '       d8   \        P                  ! WP                  R7      p\        P                  ! W4      # R# )r   )dimN)
return_typer   r   r   get_global_id
isinstancer   countr   
pack_array)contextbuildersigargsr   idss   &&&&  r   codegengrid.<locals>.codegen1   se    //ekk!**7::00))'}}EC%%g33 1r   r    r   IntegerLiteralr   r   )	typingctxr   r%   r(   s   &&  r   gridr-      s;    " dE0011!$''
d
#C4 <r   c                   a \        V\        P                  4      '       g   \        V4      h\	        V4      pR oV3R lpW#3# )a  gridsize(ndim)

Return the absolute size (or shape) in threads of the entire grid of
blocks. *ndim* should correspond to the number of dimensions declared when
instantiating the kernel. If *ndim* is 1, a single integer is returned.
If *ndim* is 2 or 3, a tuple of the given number of integers is returned.

Computation of the first integer is as follows::

    cuda.blockDim.x * cuda.gridDim.x

and is similar for the other two indices, but using the ``y`` and ``z``
attributes.
c                     \         P                  ! ^@4      p\        P                  ! V RV 24      p\        P                  ! V RV 24      pV P	                  V P                  W24      V P                  WB4      4      # )@   zntid.znctaid.)r   IntTyper   	call_sregmulsext)r$   r   i64ntidnctaids   &&   r   _nthreads_for_dim#gridsize.<locals>._nthreads_for_dimR   sb    jjn""7eC5M:$$Wuo>{{7<<2GLL4MNNr   c                 l  < VP                   pS! VR 4      pV\        P                  8X  d   V# \        V\        P                  4      '       dg   S! VR4      pVP
                  ^8X  d   \        P                  ! WV34      # VP
                  ^8X  d"   S! VR4      p\        P                  ! WWg34      # R# R# )xyzN)r   r   r   r    r   r!   r   r"   )	r#   r$   r%   r&   r   nxnynzr8   s	   &&&&    r   r(   gridsize.<locals>.codegenX   s    //w,ekk!I00"7C0B}}!))'8<<!#&w4))'<@@ $ 1r   r*   )r,   r   r%   r(   r8   s   &&  @r   gridsizerB   <   sC    " dE0011!$''
d
#COA <r   c                 @    \        \        P                  4      pR  pW3# )c                 0    \         P                  ! VR 4      # )warpsize)r   r2   )r#   r$   r%   r&   s   &&&&r   r(   _warpsize.<locals>.codegenn   s    ""7J77r   )r	   r   r   r,   r%   r(   s   &  r   	_warpsizerH   j   s    
EKK
 C8 <r   rE   r   )targetc                    R pV# )zS
The size of a warp. All architectures implemented to date have a warp size
of 32.
c                     \        4       # )N)rH   )mods   &r   getcuda_warpsize.<locals>.getz   s
    {r    )rL   rM   s   & r   cuda_warpsizerP   t   s    Jr   c                @    \        \        P                  4      pR pW3# )a  
Synchronize all threads in the same thread block.  This function implements
the same pattern as barriers in traditional multi-threaded programming: this
function waits until all threads in the block call it, at which point it
returns control to all its callers.
c                     R pVP                   p\        P                  ! \        P                  ! 4       R4      p\        P
                  ! WVV4      pVP                  VR4       V P                  4       # )zllvm.nvvm.barrier0rO   )moduler   FunctionTypeVoidTyper   get_or_insert_functioncallget_dummy_value)r#   r$   r%   r&   fnamelmodfntysyncs   &&&&    r   r(   syncthreads.<locals>.codegen   sU    $~~r{{}b1--d%@T2&&((r   )r	   r   nonerG   s   &  r   syncthreadsr_      s!     EJJ
C) <r   c                    a \        V\        P                  4      '       g   R # \        \        P                  \        P                  4      pV3R lpW43# )Nc                    < \         P                  ! \         P                  ! ^ 4      \         P                  ! ^ 4      34      p\        P                  ! VP
                  VS4      pVP                  WS4      # )    )r   rT   r1   r   rV   rS   rW   )r#   r$   r%   r&   r[   r\   rY   s   &&&&  r   r(   '_syncthreads_predicate.<locals>.codegen   sM    rzz"~

2/@A--gnndEJ||D''r   )r    r   Integerr	   i4)r,   	predicaterY   r%   r(   s   &&f  r   _syncthreads_predicaterg      s:    i//
EHHehh
'C(
 <r   c                    Rp\        WV4      # )z
syncthreads_count(predicate)

An extension to numba.cuda.syncthreads where the return value is a count
of the threads where predicate is true.
zllvm.nvvm.barrier0.popcrg   r,   rf   rY   s   && r   syncthreads_countrk      s     &E!)>>r   c                    Rp\        WV4      # )z
syncthreads_and(predicate)

An extension to numba.cuda.syncthreads where 1 is returned if predicate is
true for all threads or 0 otherwise.
zllvm.nvvm.barrier0.andri   rj   s   && r   syncthreads_andrm      s     %E!)>>r   c                    Rp\        WV4      # )z
syncthreads_or(predicate)

An extension to numba.cuda.syncthreads where 1 is returned if predicate is
true for any thread or 0 otherwise.
zllvm.nvvm.barrier0.orri   rj   s   && r   syncthreads_orro      s     $E!)>>r   N)llvmliter   numbar   r   
numba.corer   numba.core.errorsr   r   numba.core.typingr	   numba.core.extendingr
   
numba.cudar   numba.cuda.extendingr   r   r-   rB   rH   ModulerP   r_   rg   rk   rm   ro   rO   r   r   <module>ry      s       B ' 3   *	+  @ * *Z   ELL&
6B C  ( ? ? ? ? ? ?r   