
    9iqF              	       f   S SK r S SKrS SKJr  S SKJrJr  S SKJr  S SK	J
r
Jr  S SKJr  S SKJr  S SKJrJrJr  S SKrS S	KJr  / S
Qr\" \5      r " S S\\5      r\" SS9 " S S5      5       r\S\S\\   S\S   4S j5       rS\S\SS4S jrS\ \   SS4S jr!S\ \   SS4S jr"S\S\S\ \   4S jr#S\S\ \   4S jr$S\S\ \   4S jr%S\S\ \   4S jr&S\S\ \   4S jr'\" S5      r(\" S5      r)S \\)   S!\\)/\(4   S\*\(\ \)   4   4S" jr+S#\S\ \   4S$ jr,S#\S\ \   4S% jr-S&\S\ \   4S' jr.S&\S\ \   4S( jr/S\4S) jr0S\S\4S* jr1S&\S\ \   4S+ jr2S&\S\4S, jr3S-\S\4S. jr4S&\S\4S/ jr5S0\S\ \   4S1 jr6S2\\   S\4S3 jr7S\ \   4S4 jr8S5\S\ \   4S6 jr9S\ \   4S7 jr:g)8    N)defaultdict)IterableIterator)contextmanager)asdict	dataclass)Enum)	getLogger)CallableOptionalTypeVar)signpost_event)AffinityMode6maybe_temporarily_apply_numa_binding_to_current_threadNumaOptionsc                   (    \ rS rSrSrSrSrSrSrSr	g)	r      zK
See behavior description for each affinity mode
in torch.distributed.run.
nodesocket	exclusivezcore-complex N)
__name__
__module____qualname____firstlineno____doc__NODESOCKET	EXCLUSIVECORE_COMPLEX__static_attributes__r       R/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/numa/binding.pyr   r      s    
 DFI!Lr"   r   T)frozenc                   0    \ rS rSr% \\S'    Sr\\S'   Srg)r   $   affinity_modeF!should_fall_back_if_binding_failsr   N)	r   r   r   r   r   __annotations__r(   boolr!   r   r"   r#   r   r   $   s     /4%t3r"   r   	gpu_indexnuma_optionsreturnc              #   \   #    Uc  Sv   g[        5       n[        XS9  Sv   [        US9  g7f)z
1. Applies NUMA binding to the current thread, suitable for the thread
which will be interacting with GPU gpu_index.
2. Resets to the original CPU affinity before exiting the context manager.
Nr+   r,   logical_cpu_indices)+_get_allowed_cpu_indices_for_current_thread%_apply_numa_binding_to_current_thread$_bind_current_thread_to_logical_cpus)r+   r,   original_logical_cpu_indicess      r#   r   r   2   s7      #N#P ) 
(8s   *,c           	      |   U [        U5      S.n[        R                  SU5         [        XS9n[        R                  S[	        U5      5        [        US9  [        R                  S[	        U5      5        [        US9  [        R                  S[	        U5      5        [        SS0 UES	[	        U5      0ES
9  g ! [         ax    [        SS0 UES[        R                  " 5       0ES
9  [        R                  SU5        UR                  (       a+  [        R                  S[        R                  " 5       5         g e f = f)Nr/   z0Attempting to apply NUMA binding, given input %rz0Computed logical_cpu_indices=%s for NUMA bindingr0   z1Validated logical_cpu_indices=%s for NUMA bindingz=Successfully bound to logical_cpu_indices=%s for NUMA bindingnuma_bindingapply_successr1   )categoryname
parametersapply_exception	tracebackz)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)r   loggerinfo_get_logical_cpus_to_bind_to_get_ranges_str_from_ints%_raise_if_logical_cpu_indices_invalidr4   r   	Exceptionr=   
format_exc	exceptionr(   warning)r+   r,   kwargsr1   s       r#   r3   r3   I   sF    |,F KKBFK-:
 	>%&9:	

 	.BUV?%&9:	

 	-ATUK%&9:	

 	# %'@AT'U	
  #"Y113	
 	DfM99NNZ$$& !s   BB9 9A>D;9D;r1   c                 (    U (       d  [        S5      eg )Nz+Must bind to a non-empty set of CPU indices)RuntimeErrorr0   s    r#   rB   rB      s    HII r"   c                 2    [         R                  " SU 5        g Nr   )ossched_setaffinityr0   s    r#   r4   r4      s    /0r"   c                 |   UR                   [        R                  :X  a  [        U S9nU$ UR                   [        R                  :X  a  [        U S9nU$ UR                   [        R                  :X  a  [        U S9nU$ UR                   [        R                  :X  a  [        U S9nU$ [        SUR                    S35      e)z
Args:
    gpu_index: The index of the GPU that will be used by the subprocess.
        Example: 0
    numa_options: See NumaOptions for details.

Returns:
    Set of logical CPU indices to bind to.
r+   zAffinity mode z not supported.)r'   r   r   !_node_get_logical_cpus_to_bind_tor   #_socket_get_logical_cpus_to_bind_tor   &_exclusive_get_logical_cpus_to_bind_tor    )_core_complex_get_logical_cpus_to_bind_to
ValueError)r+   r,   logical_cpuss      r#   r@   r@      s     !!\%6%6689M  
	#	#|':':	::YO  
	#	#|'='=	==	R  
	#	#|'@'@	@@9U  >,*D*D)E_UVVr"   c                 &    [        U S9n[        US9$ )z%
Core logic of 'node' numa strategy.
rO   numa_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r+   rX   s     r#   rP   rP      s     99MO9' r"   c                     [        U S9n[        US9n[        US9n[        5       nU H  nUR	                  [        US95        M     U$ )z'
Core logic of 'socket' numa strategy.
rO   rW   )socket_index)rY   _get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterZ   )r+   numa_node_index_of_gpur\   numa_node_indicesrU   rX   s         r#   rQ   rQ      sb     @)T2.L @! 5L,: /	
 - r"   c                 J   [        U S9n[        US9n[        U5      nUR                  U 5      n[	        US9n[        US 5      n[        [        UR                  5       5      5      n[        U5      [        U5      -  n[        U5      [        U5      -  nUS:  a,  [        S[        U5       SU< S3S[        U5       S	3-   5      eX6-  [        X75      -   nUU-   X7:  a  SOS
-   n	[        UR                  5       5      X  V
Vs1 s H  n
U
  H  nUiM     M     nn
nU$ s  snn
f )z*
Core logic of 'exclusive' numa strategy.
rO   rW   c                 &    [        [        U S95      $ Nlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asrf   s    r#   <lambda>8_exclusive_get_logical_cpus_to_bind_to.<locals>.<lambda>   s    #B"3#
r"      zThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r   )rY   _get_gpu_indices_for_numa_nodesortedindexrZ   	_group_bydictitemslenrI   rh   listvalues)r+   rX   gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartendr1   rg   $logical_cpu_indices_for_original_gpus                r#   rR   rR      s    99MO0QK%K"-"3"3I">"P'# 4=#	
40 48;AACD40 "%4"	[	"
 0340K0, "A%c"NOPPtdscuuvwK 011VWX
 	
 (Ds#H E 	
$	% +U 	
  $(8??A$

$,$ "5	 	 "5	 	$ ) , 0/,s   Dc                 &   [        U S9n[        US9n[        U5      nUR                  U 5      n[	        US9n[        US 5      n[        [        UR                  5       S S95      nU[        U5      -  n[        UR                  5       5      U   nU$ )z
Core logic of 'core-complex' numa strategy.

Each GPU is assigned a full core complex (group of cores sharing L3 cache)
within its affined NUMA node.
rO   rW   c                 &    [        [        U S95      $ re   )rh   1_get_logical_cpus_sharing_same_max_level_cache_asrf   s    r#   rj   ;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>%  s    #="3#
r"   c                 *    [        U S   5      * U S   4$ )Nrl   r   )rt   )items    r#   rj   r   1  s    s47|mT!W5r"   )key)rY   rn   ro   rp   rZ   rq   rr   rs   rt   ru   rv   )r+   rX   rw   rx   ry   .max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpur   s           r#   rS   rS     s     99MO0QK%K"-"3"3I">"P'# 6?#	
62 6::@@B 6		
62 $?6B $  ,06==?,",$( 0/r"   KVrv   get_keyc                 l    [        [        5      nU  H  nU" U5      nX$   R                  U5        M      U$ )z*
Groups elements with same key into sets.
)r   r_   add)rv   r   key_to_valuesvaluer   s        r#   rq   rq   C  s:     -8,<Menu%  r"   rg   c                     SU  S3n[        U5       n[        UR                  5       5      sS S S 5        $ ! , (       d  f       g = f)N/sys/devices/system/cpu/cpuz/topology/thread_siblings_list)open_get_set_of_int_from_ranges_strread)rg   "thread_siblings_list_absolute_pathfs      r#   ri   ri   N  s?     &&7%88VW ' 
0	1Q.qvvx8 
2	1	1s	   5
Ac                 ~   SU  S3nSn[        5       n[        R                  " U5       GHZ  nUR                  S5      (       a  USS  R	                  5       (       d  M4  [        R
                  R                  X5      n[        R
                  R                  US5      n[        U5       nUR                  5       R                  5       S;  a   S S S 5        M   S S S 5        [        R
                  R                  US5      n[        U5       n	[        U	R                  5       5      n
S S S 5        W
U::  a  GM  U
n[        R
                  R                  US	5      n[        U5       n[        UR                  5       5      nS S S 5        GM]     U$ ! , (       d  f       N= f! , (       d  f       N~= f! , (       d  f       GM  = f)
Nr   z/cacherp      type>   DataUnifiedlevelshared_cpu_list)r_   rL   listdir
startswith	isdecimalpathjoinr   r   stripintr   )rg   cpu_cache_dir_absolute_path	max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path	type_filelevel_absolute_path
level_filer   shared_cpu_list_absolute_pathshare_cpu_list_files                r#   r   r   X  sw    &&7%8?   I+.5(78((ab	0C0C0E0E$&GGLL1L$T!  WW\\*CVL$%~~%%'/BB &%B & !ggll+DgN%&*
)*E 'I	(*%'8)
% /04G3R#((*40 10+ 94 0/' &%
 '& 10s$   '$F
	F!F,

F	
F)	,
F<	rX   c                 0    [        U S9n[        5       nX-  $ NrW   )0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWEDr2   )rX   all_cpu_indicesallowed_cpu_indicess      r#   rZ   rZ   ~  s$    F'O FG00r"   c                     SU  S3n [        U5       nUR                  5       nSSS5        [	        W5      $ ! , (       d  f       N= f! [         a  n[        SU < S35      UeSnAff = f)z
Returns:
    Indices of all CPUs associated with numa_node_index. However, the list
    is not filtered based on whether the thread is allowed to use them.
z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)r   r   FileNotFoundErrorrI   r   )rX   cpulist_absolute_pathr   cpu_range_stres        r#   r   r     sv     <O;LHU'(AFFHM ) +=99 )( I8J!L
	s1   A 7A 
AA A 
A(A##A(c                  >    [         R                  R                  5       $ )N)torchcudadevice_countr   r"   r#   _get_gpu_countr     s    ::""$$r"   c                 ^   [         R                  R                  U 5      nUR                  nUR                  nUR
                  nUS SUS SUS S3nSU S3n[        U5       n[        [        UR                  5       R                  5       5      S5      sS S S 5        $ ! , (       d  f       g = f)N04x:02xz.0z/sys/bus/pci/devices/z
/numa_noder   )r   r   get_device_propertiespci_domain_id
pci_bus_idpci_device_idr   maxr   r   r   )r+   device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathr   s           r#   rY   rY     s    

88C,,F

&
&C,,F Qs3iqB7H$9(:"N	)	*a 3qvvx~~'(!,	 
+	*	*s   #1B
B,c                 r    [        [        5       5       Vs1 s H  n[        US9U :X  d  M  UiM     sn$ s  snf )NrO   )ranger   rY   )rX   r+   s     r#   rn   rn     s>     ~/00I-	BoU 	0  s   44c                 &    [        U S9n[        US9$ NrW   )	cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)rX   arbitrary_cpu_indexs     r#   r]   r]     s    H' %/BCCr"   r   c                     SU  S3n [        U5       n[        UR                  5       R                  5       5      sS S S 5        $ ! , (       d  f       g = f! [         a  n[        SU < 35      UeS nAff = f)Nr   z/topology/physical_package_idz)Could not determine socket for cpu_index=)r   r   r   r   r   rI   )r   package_id_absolute_pathr   r   s       r#   r   r     sm    
%i[0MN R*+qqvvx~~'( ,++ RGYLIJPQQRs3   A 'A	A 
AA A 
A4A//A4c                 &    [        [        U S95      $ r   )rh   rZ   rW   s    r#   r   r     s    6W r"   
ranges_strc                 J   [        5       nU R                  S5       H  nUR                  5       nU(       d  M  SU;   aG  UR                  S5      u  p4[        U5      [        U5      peUR	                  [        XVS-   5      5        Mi  UR                  [        U5      5        M     U$ )z
Util for parsing a string of int ranges, as in a sysfs file.

Args:
    ranges_str: E.g., "0-2,4,6-7"

Returns:
    E.g., {0, 1, 2, 4, 6, 7}
rm   -rl   )r_   splitr   r   r`   r   r   )r   ints	range_str	start_strend_strr}   r~   s          r#   r   r     s     UD%%c*	OO%	)!*!5IYW3KKe1W-.HHS^$ + Kr"   r   c                 H   U (       d  g[        U 5      n/ nUS   =p4USS  H@  nXTS-   :X  a  UnM  X4:X  a  UR                  U 5        OUR                  U SU 35        U=p4MB     X4:X  a  UR                  U 5        OUR                  U SU 35        SR                  U5      $ )z
Convert a set of integers to a compact string with ranges.

Args:
    ints: E.g., {0, 1, 2, 4, 6, 7}

Returns:
    E.g., "0-2,4,6-7"
 r   rl   Nr   rm   )ro   appendr   )r   sorted_intsrangesr}   prevnums         r#   rA   rA     s     ,KFq>!E12(?D})q/0ED  }!q'(88Fr"   c                      [        S5       n U R                  5       nS S S 5        [        W5      $ ! , (       d  f       N= f)Nz!/sys/devices/system/node/possible)r   r   r   )r   possible_nodes_strs     r#   !_get_systemwide_numa_node_indicesr     s5    	1	2aVVX 
3 ++=>> 
3	2s   0
>r\   c                     [        5       n[        5       nU H,  n[        US9nU [        US9:X  d  M  UR	                  U5        M.     U$ r   )r   r_   r   r   r   )r\   systemwide_numa_node_indicesmatching_numa_node_indicesrX   r   s        r#   r^   r^     sR    #D#F !$7L+
 4?RSS&**?; 8 &%r"   c                  .    [         R                  " S5      $ rK   )rL   sched_getaffinityr   r"   r#   r2   r2   !  s    ""r"   );rL   r=   collectionsr   collections.abcr   r   
contextlibr   dataclassesr   r   enumr	   loggingr
   typingr   r   r   r   torch._utils_internalr   __all__r   r>   strr   r   r   r   r3   r_   rB   r4   r@   rP   rQ   rR   rS   r   r   rr   rq   ri   r   rZ   r   r   rY   rn   r]   r   r   r   rA   r   r^   r2   r   r"   r#   <module>r      s   	  # . % )   . .  0 
8		"3 	" $
4 
4 
4 %-k%:d^ ,66%06	6rJ#c( Jt J
1S 1d 1
  	X	8C CH c c#h .C0 C0S C0L,0C ,0CH ,0^ CLCLhqk HaS!V,< aQi 99X9#0#0X#0L1s 1sSVx 1::X:&% %-S -S -$s s3x D D DRC RC Rs s  C 0!HSM !c !H?3s8 ?&S &SX &#SX #r"   