
    9i1                     d   S SK r S SKrS SKJrJrJr  S SKrS SKJr	  S\ R                  4S jrS\SS4S jrS\ R                  4S jr    SS	\S
\S\\   S\S\\   S\\   S\4S jjr " S S5      r " S S5      r SS\\\4   S\\\      S\\\\S4   4   4S jjr SS\S\S\S\4S jjrg)    N)AnyOptionalUnion)_get_device_indexreturnc                      [         R                  S:X  a  [        R                  " S5      $ [        R                  " S5      $ )Nwin32z
nvcuda.dllzlibcuda.so.1)sysplatformctypesCDLL     Q/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/cuda/_utils.py_get_cuda_libraryr      s,    
||w{{<(({{>**r   resultc                    U S:X  a  g [         R                  " 5       n[        5       nUR                  U [         R                  " U5      5        UR
                  b  UR
                  R                  5       OSn[        SU 35      e)Nr   Unknown CUDA errorCUDA error: )r   c_char_pr   cuGetErrorStringbyrefvaluedecodeRuntimeError)r   err_strlibcudaerror_messages       r   _check_cudar      sn    {ooG!GVV\\'%:;")--";AU  m_5
66r   c                  *   [        [        R                  R                  R	                  S5      S   5      n [
        R                  S:X  a  SU  S3/nOSU  3S/nU H  n [        R                  " U5      s  $    [        S5      e! [         a     M6  f = f)	N.r   r	   nvrtc64_z0_0.dllzlibnvrtc.so.zlibnvrtc.soz Could not find any NVRTC library)
inttorchversioncudasplitr
   r   r   r   OSError)major_version
nvrtc_libslib_names      r   _get_nvrtc_libraryr,       s    **005a89M
||w}oW-


 =/*

 	;;x(( 
 4
55  		s   B
BBkernel_sourcekernel_namecompute_capabilityheader_codecuda_include_dirsnvcc_optionsc           
        ^^ SSK n[        5       mSmS[        SS4UU4S jjnU R                  5       R	                  S5      (       d  SU  3n U(       a	  US-   U -   nOU nUR                  S	5      n	UcM  UR                  R                  UR                  R                  5       5      n
U
R                   U
R                   3n/ nUR                  S
U 3R                  5       5        U(       a+  U H%  nUR                  SU 3R                  5       5        M'     U(       a)  U H#  nUR                  UR                  S	5      5        M%     SSKJn  U Vs/ s H  oS:w  d  M
  UPM     nnUR                  U Vs/ s H  oR                  S	5      PM     sn5        [        U5      n[         R"                  U-  " U6 n[         R$                  " 5       nU" TR'                  [         R(                  " U5      U	U S3R                  5       SSS5      5        TR+                  UUU5      nUT:w  a  [         R,                  " 5       nTR/                  U[         R(                  " U5      5        [         R0                  " UR2                  5      nTR5                  UU5        [7        SUR2                  R9                  5        35      e[         R,                  " 5       nU" TR;                  U[         R(                  " U5      5      5        [         R0                  " UR2                  5      nU" TR=                  UU5      5        TR?                  [         R(                  " U5      5        UR2                  $ s  snf s  snf )a  
Compiles a CUDA kernel using NVRTC and returns the PTX code.

Args:
    kernel_source (str): The CUDA kernel source code as a string
    kernel_name (str): The name of the kernel function to compile
    compute_capability (str, None): The compute capability to target (e.g., "86").
                                       If None, will detect from current device.
    header_code (str, optional): Additional header code to prepend to the kernel source
    cuda_include_dirs (list, None): List of directories containing CUDA headers
    nvcc_options (list, None): Additional options to pass to NVRTC

Returns:
    str: The compiled PTX code
r   Nr   r   c                    > U T:w  ar  [         R                  " 5       nTR                  U [         R                  " U5      5        UR                  b  UR                  R                  5       OSn[        SU 35      eg )Nr   r   )r   r   nvrtcGetErrorStringr   r   r   r   )r   r   r   NVRTC_SUCCESSlibnvrtcs      r   check_nvrtc#_nvrtc_compile.<locals>.check_nvrtcT   so    ]"oo'G((g1FG ==, $$&) 
 m_=>> #r   z
extern "C"zextern "C" 
utf-8z--gpu-architecture=sm_z-I)COMMON_NVCC_FLAGSz--expt-relaxed-constexprz.cuzKernel compilation failed:
) 
torch.cudar,   r#   strip
startswithencoder&   get_device_propertiescurrent_devicemajorminorappendtorch.utils.cpp_extensionr<   extendlenr   r   c_void_pnvrtcCreateProgramr   nvrtcCompileProgramc_size_tnvrtcGetProgramLogSizecreate_string_bufferr   nvrtcGetProgramLogr   r   nvrtcGetPTXSizenvrtcGetPTXnvrtcDestroyProgram)r-   r.   r/   r0   r1   r2   r$   r8   full_sourcesource_bytespropsoptions	directoryoptionr<   flagnvrtc_compatible_flagsnum_optionsoptions_arrayprogreslog_sizelogptx_sizeptxr6   r7   s                            @@r   _nvrtc_compilerc   3   s   0  "#H M	?C 	?D 	? 	?  ++L99%m_5 !D(=8# %%g.L !

001J1J1LM %}U[[M: GNN+,>+?@GGIJ *INNR	{+2245 + "FNN6==12 # < +*6P.P*   NN5KL5KTKK(5KLM g,K__{2W=M ??D##LLm3&&(	
	 
&
&t[-
HC m??$''fll8.DE))(..9##D#.9#)):J:J:L9MNOO  H((v||H/EFG

%
%hnn
5C$$T3/0  d!3499S Ms   	M"M"4M'c                   L    \ rS rSrS\R
                  SS4S jrS\SS4S jrS	r	g)
_CudaModule   moduler   Nc                     Xl         0 U l        g N)_module_kernels)selfrg   s     r   __init___CudaModule.__init__   s    02r   name_CudaKernelc           	         XR                   ;   a  U R                   U   $ SSKJn  U" 5       n[        R                  " 5       n [        UR                  [        R                  " U5      U R                  UR                  S5      5      5        [        X@R                  5      nXPR                   U'   U$ ! [         a  n[        SU S35      UeS nAff = f)Nr   )r   r;   zNo kernel named 'z' in this module)rk   torch.cuda._utilsr   r   rI   r   cuModuleGetFunctionr   rj   r@   rp   r   AttributeError)rl   ro   r   r   funckernelerrs          r   __getattr___CudaModule.__getattr__   s    == ==&& 	8#% 	V++LL&dkk'6J
 !||4F"(MM$M 	V #4TF:J!KLRUU	Vs   A-B0 0
C:C

C)rk   rj   )
__name__
__module____qualname____firstlineno__r   rI   rm   strrx   __static_attributes__r   r   r   re   re      s/    3v 34 3V V Vr   re   c                       \ rS rSrSrS\R                  S\R                  SS4S jr     SS\\	\	\	4   S	\\	\	\	4   S
\
\   S\	S\
\   SS4S jjrSrg)rp      zL
Represents a compiled CUDA kernel that can be called with PyTorch tensors.
ru   rg   r   Nc                     Xl         X l        g ri   ru   rg   )rl   ru   rg   s      r   rm   _CudaKernel.__init__   s    	r   gridblockargs
shared_memstreamc                    SSK nUR                  R                  R                  5       nU(       d  / n/ n/ n	U GHv  n
[	        XR
                  5      (       a  U
R                  (       d1  U
R                  (       a  U
R                  5       (       d  [        S5      e[        R                  " U
R                  5       5      nUR                  U5        U	R                  [        R                  " U5      5        M  [	        U
[        5      (       a>  [        R                   " U
5      nU	R                  [        R                  " U5      5        GM  [	        U
["        5      (       a>  [        R$                  " U
5      nU	R                  [        R                  " U5      5        GMb  ['        S[)        U
5       35      e   [        R                  [+        U	5      -  " 5       n[-        U	5       H,  u  p[        R.                  " U
[        R                  5      X'   M.     Uc  SSKnUR                  R3                  5       n[5        UR7                  U R8                  US   US   US   US   US   US   UUR:                  US5      5        g)a  
Call the compiled CUDA kernel

Args:
    grid (tuple): Grid dimensions (grid_x, grid_y, grid_z)
    block (tuple): Block dimensions (block_x, block_y, block_z)
    args (list): List of arguments to pass to the kernel.
                 PyTorch tensor arguments will be automatically converted to pointers.
    shared_mem (int): Shared memory size in bytes
    stream (torch.cuda.Stream): CUDA stream to use. If None, uses current stream.
r   Nz?All tensor arguments must be CUDA tensors or pinned CPU tensorszUnsupported argument type:       )r$   r&   _utilsr   
isinstanceTensoris_cudais_cpu	is_pinned
ValueErrorr   rI   data_ptrrE   r   r#   c_intfloatc_float	TypeErrortyperH   	enumeratecastr=   current_streamr   cuLaunchKernelru   _as_parameter_)rl   r   r   r   r   r   r$   r   processed_argsc_argsargptrr   r   c_args_arrayis                   r   __call___CudaKernel.__call__   s   & 	**##557D 13C#||,,{{CJJ3==??$Y  ooclln5%%c*fll3/0C%%S)fll512C'' ..-fll734"=d3i[ IJJ- 2 #f+58'FA$kk#v?LO ( >ZZ..0F""		QQQaaa%%	
r   r   )r   r   r   r   Nr   N)rz   r{   r|   r}   __doc__r   rI   rm   tupler#   r   listr   r   r   r   r   r   rp   rp      s    V__ foo $  &/&/# $P
CcM"P
 S#s]#P
 tn	P

 P
 P
 
P
 P
r   rp   rb   kernel_namesc           
      \   SSK n[        5       n[        U [        5      (       a  U R	                  S5      n [
        R                  " 5       nUR                  R                  5       nU   [        UR                  [
        R                  " U5      U 5      5        SSS5        U(       d  [        U5      $ 0 nU Hc  n[
        R                  " 5       n[        UR                  [
        R                  " U5      XGR	                  S5      5      5        [        X5      Xg'   Me     U$ ! , (       d  f       N= f)a  
Loads a CUDA module from PTX code and returns a module object that can access kernels.

Args:
    ptx (bytes or str): The PTX code to load
    kernel_names (list, optional): List of kernel names to extract from the module.
                                  If None, will return a module object with __getattr__.

Returns:
    object: If kernel_names is None, returns a module object with __getattr__ to access kernels.
           If kernel_names is provided, returns a dict mapping kernel names to _CudaKernel objects.
r   Nr;   )r=   r   r   r~   r@   r   rI   r&   r   r   cuModuleLoadDatar   re   rs   rp   )	rb   r   r$   r   rg   r   kernelsro   ru   s	            r   _cuda_load_moduler   (  s        !G #sjj! __FZZ&&(F	G,,V\\&-A3GH 
 6"" G ''T"FKK,@	

 $D1  N! 
s   &0D
D+deviceoptional	allow_cpuc                    [        U [        5      (       a  U $ [        U [        5      (       a  [        R                  " U 5      n [        U [        R                  5      (       aD  U(       a  U R
                  S;  a  [        SU  35      eOU R
                  S:w  a  [        SU  35      e[        R                  R                  5       (       d5  [        U [        R                  R                  5      (       a  U R                  $ [        XU5      $ )a  Get the device index from :attr:`device`, which can be a torch.device object, a Python integer, or ``None``.

If :attr:`device` is a torch.device object, returns the device index if it
is a CUDA device. Note that for a CUDA device without a specified index,
i.e., ``torch.device('cuda')``, this will return the current default CUDA
device if :attr:`optional` is ``True``. If :attr:`allow_cpu` is ``True``,
CPU devices will be accepted and ``-1`` will be returned in this case.

If :attr:`device` is a Python integer, it is returned as is.

If :attr:`device` is ``None``, this will return the current default CUDA
device if :attr:`optional` is ``True``.
)r&   cpuz(Expected a cuda or cpu device, but got: r&   z!Expected a cuda device, but got: )r   r#   r~   r$   r   r   r   jitis_scriptingr&   idx_torch_get_device_index)r   r   r   s      r   r   r   X  s      &#&#f%&%,,''{{/1 #KF8!TUU 2[[F"@IJJ99!!##fejj//00::"6Y??r   )N NNri   )FF)r   r
   typingr   r   r   r$   torch._utilsr   r   r   r   r#   r   r,   r~   r   bytesrc   re   rp   dictr   boolr   r   r   <module>r      sK    
 ' '  F+6;; +	7 	7 	76FKK 6, )-(,#'yyy !y 	y
  ~y 4.y yxV V:Y
 Y
z AE-	sEz	-*249*=-
;S-/001-b <A@@@48@@r   