
    9i0                       % S SK r S SKrS SKJrJrJrJrJrJr  S SK	r	S SK
r	S SKJr  S SKJrJrJr  \ R"                  R%                  SS5      S:H  rSrS\\   4S	 jrS\\   4S
 jrS\\   4S jrS\\   4S jrS\\   4S jrS\4S jrS\4S jr\ R"                  R%                  SS5      S:H  rSrSrSr \" 5       (       a  SOSr!Sr"\#\$S'   \" SSSS9r%\\$S'   Sr&\\$S'   \" 5       r'\\   \$S'   \" 5       r(\\   \$S'   \" SSSS9r)\\$S'   Sr*\\$S'   \" 5       r+\\   \$S '   \" 5       r,\\   \$S!'   \" S"S#9r-\\$S$'   Sr.\\$S%'   0 r/\0\1\14   \$S&'   Sr2\\#   \$S''   S(r3\S)   \$S*'   \ R"                  R%                  S+S,5      S:H  r4\\$S-'   \ R"                  R%                  S.S,5      S:H  r5\\$S/'   \ R"                  R%                  S0S,5      S:H  r6\\$S1'   \" 5       (       + r7\\$S2'   \ R"                  R%                  S3S5      S:H  r8Sr9Sr:\ R"                  R%                  S4S5      S:H  r;\ R"                  R%                  S55      S:H  r<\ R"                  R%                  S6S5      S:H  r=\ R"                  R%                  S7\" 5       (       a  S,OS5      S:H  r>Sr?Sr@SrA\ R"                  R%                  S8S,5      S:H  rB\ R"                  R%                  S95      S:H  rCSrD\ R"                  R%                  S:S;5      rE\S<   \$S='   SrFSrG\" 5       rHSrISrJSrKSrL\	R                  R                  R                  \$S>'   SrP\	R                  R                  R                  \$S?'   SrQ\	R                  R                  R                  \$S@'   SrS\	R                  R                  R                  \$SA'   SrT\	R                  R                  R                  \$SB'   SrU\\\	R                  R                  R                  /S4      \$SC'   SrY\\\ZSD   /\ZSD   4      \$SE'   Sr[\\\ZSD   /\ZSD   4      \$SF'   Sr\Sr]Sr^Sr_Sr`0 ra\0\1\0\1\4   4   \$SG'   0 rb\0\1\0\1\4   4   \$SH'   Src\ R"                  R%                  SIS5      S:H  rdSreSrfSSJSKSSL.rg\0\1\4   \$SM'   SNrh\SO   \$SP'   Sri/ SQQrj\Z\\1\\ZSD   /\ZSD   4   4      \$SR'   Srk\\#   \$SS'   SrlSrm\\$ST'   \ R                  " SU5      =roc  SO\#" \o5      rp\\#   \$SV'   \ R                  " SW5      =roc  SO\#" \o5      rq\\#   \$SX'   SYrr\SZ   \$S['   Srs\\\#/\#4      \$S\'   SYrt\S]   \$S^'   Sru\\\#/\#4      \$S_'   S`rvSrw\\$Sa'   SbrxScry\" SSdSeSf9rz\\$Sg'   \ R"                  R%                  Sh5      S:H  r{\ R"                  R%                  Si5      S:H  r|\ R"                  R%                  Sj5      S:H  r}Skr~\\#   \$Sl'   \ R"                  R%                  SmS5      S:H  r\ R"                  R%                  SnS5      S:H  r\ R"                  R%                  So\" 5       (       d  SOS,5      S:H  r\\$Sp'   / r\Z\1   \$Sq'   \" SrSsSS9r\\$St'   / r\Z\#   \$Su'   \ R"                  R%                  SvSw5      GR                  5       r\ R"                  R%                  SxSy5      GR                  5       r\ R"                  R%                  SzS{5      GR                  5       r\S|   \$S}'   \ R"                  R%                  S~S{5      GR                  5       r\S|   \$S'   SrSrSr\ R"                  R%                  S5      S:H  r\ R"                  R%                  S5      S:H  rSrSrSr\ R"                  R%                  S5      S:H  r\ R"                  R%                  S5      S:H  r\ R"                  R%                  S5      S:H  r\#" \ R"                  R%                  SS5      5      r\ R"                  R%                  SS5      r\ R"                  R%                  SS5      r\ R"                  R%                  SS,5      S:H  rS\1S\4S jrS\1S\4S jrS\1S\4S jr\ R"                  R%                  SS{5      r\	GR:                  GR<                  (       d  SOS,r\ R"                  R%                  S\5      S:H  r\ R"                  R%                  SS,5      S:H  r\ R"                  R%                  SS5      S:H  r\ R"                  R%                  S5      S:H  rSrSrSrSr\\#   \$S'   SrSr\ R"                  R%                  S5      S:H  rSr\ R"                  R%                  S5      S:H  r\\$S'   \ R"                  R%                  S5      S:H  r\\$S'   \ R"                  R%                  SS5      r\ R"                  R%                  SS,5      S:H  r\\$S'   Skr\ R"                  R%                  SS5      S:H  rSKrSrSrSrSrSrSrSrSr\#" \ R"                  R%                  SS 5      5      r\ R"                  R%                  SS,5      S:H  rSrSrSrSrSrSrSKrSKrSrSrSr\ R"                  R%                  SS,5      S:H  rS\	GR                  ;   =(       d    S\	GR                  ;   r\" 5       =(       d    \r\ R"                  R%                  SS5      S:H  rSr\\1   \$S'   Sr\\1   \$S'   S\14S jr\" 5       r\1\$S'   Sr\#\$S'   \" SSSS9r\\$S'   \" SSS9r\\$S'   SrScrSS/r\Z\\S   \14      \$S'   Sr\\$S'    " S S5      rS\4S jrS\#4S jr\" 5       (       a  SO\" 5       r\\#   \$S'   \" SSSS9r\\$S'   \" 5       r\\$S'   \" SSSS9r\\$S'   \ R"                  R%                  SS,5      S:H  r\\$S'   \\1   \$S'   \" 5       (       an   S SKJr  \(       aL  \GR                  " \ GR                  GR                  \GR                  S\ GR                  5      S5      5      rO\GR                  " S5      rOSrSkr\ R"                  R%                  SS5      S:H  r\ R"                  R%                  SS5      S:H  rSrSrSrSrSrSrSrSrSr\\$S'   \ R"                  R%                  SS,5      S:H  rSrSrSrSr\ R"                  R%                  SS5      r\S:g  r\S:X  a  SO\r\ R"                  R%                  SS5      r\\1   \$S'   \ R"                  R%                  S5      S:H  Gr SGr\ R"                  R%                  SS,5      S:H  Gr\\$S'   SGr\\$S'   SGr\\$S'   SGr\\$S'   SGr\\$S'   SGr\\$S'   SGr\\$S'   \ R"                  R%                  SS,5      S:H  Gr	\ R"                  R%                  SS,5      S:H  Gr
\\$S'   SGr\\$S'   0 Gr\0\1\0\1\4   4   \$S'   S\\1   4S jGr\" SSS9Gr\1\$S'    " S S5      Gr " S S5      Gr " S S5      Gr " S S5      Gr " S S5      GrSGr\S   \$S'   SGr\S   \$S'    " S S5      Gr " S S5      Gr/ SQGr\Z\1   \$S'   / SQGr\Z\1   \$S'   / Gr\Z\\	GR6                  \	GR6                  \	GR6                  /S4      \$S'    " S GS 5      Gr\(       a  S GSGK7  \" \GR<                  G\   5        g! \\4 a    Sr GNf = f(      N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      [        S5      $ )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        V/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      [         R                  R                  S5      S:X  a  g[         R                  R                  S5      S:X  a  gg )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr      s7    	zz~~01S8	zz~~01S8r   c                      [        S5      $ )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr!      r   r   c                      [        S5      $ )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr$      s    IJJr   c                  D    [        S[        5       (       d  S5      $ S 5      $ )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r	   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr'   #   s(    9KK %) r   c                      Sn S[         R                  ;   a"  [         R                  R                  S5      S:H  $ [        5       (       a#  [        R
                  R                  S5      nX:*  $ g)N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r	   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr1   *   s]    #$ /2::=zz~~FG3NN	''<<;
 66 r   c                      Sn S[         R                  ;   a"  [         R                  R                  S5      S:H  $ [        5       (       a%  Sn[        R
                  R                  U5      nX :*  $ g)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr+   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer0   s      r   prologue_fusion_enabledr6   9   s[    %&"&"**4zz~~=>#EE	<''<<WE88r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_ranki  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEnon_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rL   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrappercpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)noner^   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r=   tritonatenrt   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit(reorder_iterative_debug_memory_recompute!PYTORCH_REORDER_COLLECTIVES_LIMIT(reorder_iterative_debug_limit_to_reorderPYTORCH_SINK_WAITS_LIMIT(sink_waits_iterative_debug_limit_to_sinkr_   )r_   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinator)r_   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinatorr=   !runtime_estimations_mms_benchmarki,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r=   r<   r;   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM
   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEMTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitioncustom_should_partition_opsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacei    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mm'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 <    [        U 5      =(       d    [        U 5      $ )N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr   ?  s     &A*;D*AAr   c                 n    U [         R                  R                  R                  R	                  S5      ;   $ N,)r,   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r   C  s(    5??))??EEcJJJr   c                 n    U [         R                  R                  R                  R	                  S5      ;   $ r   )r,   r   r   autoheuristic_user   r   s    r   r   r   G  s(    5??));;AA#FFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  z    S[         R                  ;   a  [         R                  S   n OSn U S;   d
   SU  35       eU $ )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr     sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSEr<   r=   log_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   2    \ rS rSr% Sr\\S'   Sr\\S'   Sr	g)_collectivei7  Fauto_selecti   #one_shot_all_reduce_threshold_bytesr   N)
__name__
__module____qualname____firstlineno__r   bool__annotations__r   int__static_attributes__r   r   r   r   r   7  s    K/9'9r   r   c                  P    Sn Sn[         R                  R                  U5      nX:  $ )a  
TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
knob to enable / disable. The justknob should not be performed at import, however.
So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
async_compile.py.
rn   z0pytorch/inductor:enable_parallel_compile_version)r,   r-   r.   )ENABLE_PARALLEL_COMPILE_VERSIONr5   r0   s      r   #parallel_compile_enabled_internallyr   <  s.     '(#@G##88AG*55r   c                  X   SSK n U R                  [        5      nS[        R                  ;   a0  [        [        R                  S   5      nUR                  SU5        U$ [        R                  S:X  a  SnUR                  S5        U$ [        5       (       a$  [        5       (       d  SnUR                  S5        U$ [        [        S	5      (       a  [        [        R                  " S5      5      O[        R                  " 5       nU(       d   e[        S
U5      nUR                  SU5        U$ )a	  
Here are the precedence to decide compile_threads
1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
   setting this to 1 to make pdb happy.
2. Set to 1 if it's win32 platform
3. decide by the number of CPU cores
r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rn   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr	   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr  s       r   decide_compile_threadsr  J  s     

H
%C&"**4bjj)HIJ4oF" ! 
	 56  
@BB56  r.// $$Q'( 	
 yb),,o>r   r  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pooluse_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tablec                      S n [        5       (       aH  [        R                  R                  SS 5      n[        R                  R                  SS5      nUb  SU 3n U $ )NMAST_HPC_JOB_NAME	ROLE_RANKr   r8   )r	   r   r   r   )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr.  )  sN    G{{

':DAjjnn[#6$Ek]SGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                      \ rS rSr% SrSr\R                  R                  SS5      S:H  r	\R                  R                  SS5      S:H  r
Sr\\   \S	'   \" \R                  R                  S
S5      5      rS\R                  R                  S\R"                  S:X  a  SOS5      4r\\S   \4   \S'   \R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  rSr\\   \S'   Sr\\   \S'   \" S5      r\\   \S'   Sr\S   \S'   \" \R                  R                  SS5      5      r\R                  R                  SS5      S:H  r\R                  R                  SS5      S:H  r \R                  R                  SS5      r!\R                  R                  S S5      S:H  r"S!r#\" \R                  R                  S"S5      5      r$\R                  R                  S#S5      r%\R                  R                  S$S5      r&S%r'S!r(\R                  R                  S&S5      S:H  r)S!r*\R                  R                  S'S5      S:H  r+\R                  R                  S(S5      S:H  r,S)r-g)*cppi<  zi
Settings for cpp backend.
This class provides a centralized location for managing cpp backend settings.
$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr,   rA  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAYr   ).r   r   r   r   __doc__threadsr   r   r   no_redundant_loopsdynamic_threadsr6  r   r   r   min_chunk_sizer   r   r;  tupler   strenable_kernel_profileweight_prepackr>  r?  r   r@  r   rD  max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayr   r   r   r   r2  r2  <  s    G
 	

=sCsJ  jjnn%H#NRUUO!GXc]!(JERSN 	


u3<<8+CiO&Cwt}c!	"  	

@#F#M 
 ZZ^^$FLPSSN
 37 (3-637!8C=7 "22L!MJM 	 wHI 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !
 	

=sCsJ 
  % 	

>DK  	

FLPSS  r   r2  c                      \ rS rSr% Sr\R                  R                  S5      S:H  rSr	Sr
Sr\\\\\\S4   4         \S	'   SrSr\" 5       (       a  SOSrS
rSr\\   \S'   SrSr\" SSS9r\\S'   SrSrSrSrSr \R                  R                  S\" 5       (       d  SOS5      S:H  r!\\S'   Sr"\\   \S'   Sr#\\S'   Sr$Sr%Sr&\\   \S'   Sr'\\S'   Sr(\\S'   Sr)Sr*\R                  R                  SS5      S:H  r+\R                  R                  SS5      S:H  r,Sr-\.S   \S'   \R                  R                  SS5      S:H  r/\R                  R                  SS5      S:H  r0Sr1\" \R                  R                  SS5      5      r2\.S    \S!'   \R                  R                  S"S5      S:H  r3S#r4Sr5S$r6\\S%'   Sr7Sr8Sr9\\:   \S&'   Sr;\R                  R                  S'S5      S:H  r<\R                  R                  S(S5      S:H  r=Sr>\" \R                  R                  S)S*5      5      r?\" \R                  R                  S+S,5      5      r@S-rAg).ru   i  z&
Config specific to codegen/triton.py
TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr  2   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr   cudagraph_or_error&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESrA  rB  rD  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rn   r)      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdr>  ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32r   )Br   r   r   r   rR  r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsrl  r   rW  r   r   r   slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitrn  force_cudagraph_syncforce_cudagraphs_warmupr
   rp  r   fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingrr  rs  rt  autotune_pointwiseautotune_cublasLtru  rv  rw   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesrD  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr~  divisible_by_16min_split_scan_rblockstore_cubinr  use_block_ptruse_tensor_descriptorr>  rX  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_thresholdr   r   r   r   ru   ru     s(   
  :;sBJ O %*! MQXeE#uS#X2F,G&HIP #' ).% 1:u$ +.' 9;&: ! $  &9   #( "   N 	

4cRU	
 	 d   $Ix}# #d"   04htn3
 ).- "OT! (,$'+$ 	

:C@CG  	

?EL  	 wHI  	

<cBcI  	

=sCsJ 
 $)  ),


3S9)L'*% 
 jjnn%DcJcQO   K OS M "
 37 (3-6 "
 	

5s;sB ! JJNN#:C@CGM 6;2 !


=tD  


<dCr   ru   c                      \ rS rSr% SrSr\R                  R                  SS5      S:H  r	\R                  R                  SS5      r
\R                  R                  S	S5      r\S
   \S'   \R                  R                  SS5      rSrSrSr\\S'   Sr\\S'   SrSr\\S'   Sr\\   \S'   0 r\\\4   \S'   \R                  R                  SS5      S:H  r\\S'   \R                  R                  SS5      S:H  r\\S'   \" \R                  R                  SS5      5      r\\S'   0 r\\\ 4   \S'   Sr!\\S'   Sr"\\S'   \R                  R                  SS5      S:H  r#\\S '   Sr$\\S!'   Sr%\\S"'   \&" 5       (       + r'\\S#'   Sr(\\   \S$'   Sr)\\   \S%'   Sr*\\   \S&'   0 r+\\,RZ                  R\                  \/\   4   \S''   Sr0\\/\      \S('   Sr1\\S)'   \R                  R                  S*S5      S:H  r2S+r3g),aot_inductori  z1
Settings for Ahead-Of-Time Inductor Compilation
r   AOT_INDUCTOR_DEBUG_COMPILEr   r   &AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr)   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_sopackage_constants_on_diskprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libscompile_standaloneAOT_INDUCTOR_ENABLE_LTOr   )4r   r   r   r   rR  output_pathr   r   r   debug_compilecompile_wrapper_opt_levelr  r   r   filtered_kernel_namesserialized_in_specserialized_out_specr  r   r  use_consts_asm_buildr  r  r   r  dictrX  r  r  r   r  r  r   r  r  r  r  r  r	   r  r  r  r  r  r,   _ops
OpOverloadlistr  r  
enable_ltor   r   r   r   r  r    sb    KJJNN#?ELM !#

0$! EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$  GT'+htn+  "Hd38n! 	

H#NRUU ( 
  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 	

BCHCO !$ 
 %)T( ',t+ $-;. +/$.
 .2HTN1 59"HSM8 EG4

 5 5tCy @AF*.NHT#Y'.$$  93?3FJr   r  c            
       @   \ rS rSr% SrSr\\   \S'   Sr	\\   \S'   Sr
\S   \S'   S	rS	rS	rS	r\R"                  R%                  \R&                  R)                  S
\R"                  R+                  \R"                  R-                  \R0                  5      S5      5      5      rSr\\   \S'   / SQr\\   \S'   \R&                  R)                  SS5      S:H  rS	rSr \\   \S'   Sr!\\S'   \R&                  R)                  SS5      S:H  r"\#\S'   \R&                  R)                  S5      r$\\   \S'   \R&                  R)                  S5      r%\\   \S'   \R&                  R)                  SS5      r&\\S'   \R&                  R)                  S5      r'\\   \S'   \R&                  R)                  SS5      S:H  r(\#\S '   \R&                  R)                  S!S5      S:H  r)\#\S"'   \R&                  R)                  S#S$5      r*\\S%'   S&r+\#\S''   S	r,\#\S('   S	r-\#\S)'   S&r.\#\S*'   S+r/g),cudai4  z9Settings for cuda backend, today this consists of cutlassNarchr0   -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)rn   r)   r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxrn   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelTORCHINDUCTOR_CUTLASS_PRESETScutlass_presets+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegenr   )0r   r   r   r   rR  r  r   rX  r   r0   r  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   pathrealpathr   r   joindirnamer,   __file__cutlass_dirr  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r  r  4  sY   C
 D(3-
 "GXc]! EJw@AI O   M ''""


'GGLL8:ST	
K 48!8C=7 8D)49C 	

0#6#= $
  #Hhsm" *+!3*
 	

H#NRUU $  13

)1  02zz~~(0x}  (*zz~~3S(  &(ZZ^^4S%TOXc]T 	

DcJcQ "4  	

;SASH $   "zz~~+U  
 %)T( +0!4/ -2#T1 $(D'r   r  c                      \ rS rSr% / r\\   \S'   / SQr\\	S      \S'   Sr
\	S   \S'   SrSrS	rS	rSrS
r\\   \S'   \R&                  R)                  S5      r\R&                  R)                  SS5      S:H  r\\S'   S
r\\   \S'   S
r\\   \S'   S
r\\   \S'   Sr\\S'   S
r\\\      \S'   Sr\\S'   Sr\\S'   Sr g
)rocmi  r  )gfx90agfx942gfx950ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_thresholdr   )!r   r   r   r   r  r  rX  r   r  r   r  is_debug
save_tempsr   flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r  r  r  r  r   r   r   r   r
  r
    s%    D$s)FtG$@AB  	 wL 
 H J M O #(  $Ix}# ZZ^^23F 	

FLPSS $ 
 .2Xc]1 /3hsm2 48!8C=7 ',t+ )-L(49%,  s !##"r   r
  )r2  ru   halidecpu_backend)ru   r  cuda_backendc                   R    \ rS rSr% SrSrSr\S   \S'   Sr	\S   \S'   S	r
S	rS	rS
rg)r  i  hostz	host-cudaAnderson2021)r#  Li2018	Adams2019Mullapudi2016scheduler_cudar%  scheduler_cpuFr   N)r   r   r   r   
cpu_target
gpu_targetr'  r   r   r(  assertsdebugscan_kernelsr   r   r   r   r  r    sQ    J J
 	 GRS  	 7QR 
 G E Lr   r  c            	          \ rS rSr% \R
                  R                  SS5      S:H  r\R
                  R                  SS5      S:H  rSr	\
\   \S'   SrSrS	rS	rS	rS	rS	r\R
                  R                  S
S5      S:H  r\R
                  R                  SS5      S:H  r\R
                  R                  SS5      r\R
                  R                  SS5      rSrSr\
\\/S4      \S'   \R
                  R                  SS5      S:H  r\" \R
                  R                  S\R
                  R                  SS5      5      5      r\\S'   Srg)tracei'  TORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_levelr   )r   r   r   r   r   r   r   enabledsave_real_tensorsr2  r   rX  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler7  r   log_autotuning_resultsr   r:  r   r   r   r   r/  r/  '  s\   jjnn2C8C?G 

'FLPSS  $Ix}# I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 37J3%+./6ZZ^^,BCHCO &)


!2::>>2G#M	
&s r   r/  )
ztrace.upload_tarrf   rg   rh   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrc   rd   r   ri   _save_config_ignore)r/  zcuda.cutlass_dirr   r  rd   rc   rf   rg   r   ri   r"  r>   r@   rD   rE   _cache_config_ignore_prefixexternal_matmulc                       \ rS rSr% Sr\\S'   Sr\\	   \S'   Sr
SrSr\\   \S'   Sr\\   \S'   SrSr\\S      \S	'   SrS
rg)test_configsi  F%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)assertr  track_memory_lifecycler   )r   r   r   r   rO  r   r   rP  r   r   runtime_triton_dtype_assertstatic_cpp_dtype_assertrQ  rX  rR  *graphsafe_rng_func_ignores_fallback_randomrT  r   use_libtorchr   r   r   r   rN  rN    si    27)47$(NHSM("'# 15404416.AEHW_%=>E Lr   rN  )*(   r   r   typingr   r   r   r   r   r   r,   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r!   r$   r'   r1   r6   enable_auto_functionalized_v2r,  disable_progressverbose_progressworker_log_pathr9   r   r   r>   r?   r@   rA   rC   rD   rE   rF   rH   rI   rJ   r  rX  rK   rN   rP   rR   rT   rU   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr   bfloat16_atomic_adds_enabledrb   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrc   r   custom_graph_passCustomGraphPassTyperd   re   CustomPartitionerFnTyperf   rg   rh   fxgraphGraphri   r  rj   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrk   rl   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrs   rw    reorder_for_compute_comm_overlapr{   r|   reorder_for_peak_memoryr}   getenvenv_strr   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bwr   max_autotunemax_autotune_pointwisemax_autotune_gemmr   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memr   r   r   r   uppermax_autotune_gemm_backendsmax_autotune_conv_backendsr   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr0   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   fallback_randomimplicit_fallbacks assume_unaligned_fallback_outputaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsmax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r  r  r	  r
  r  r  libfb.pyr  __package__get_dir_pathr  r  replacesepr  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr  r  r  r  r   r!  r"  enable_linear_binary_foldingr%  r&  r'  r.  r0  r2  ru   r  r  r
  r  r   r  r/  rJ  rK  rL  TensorrN  torch.utils._config_typingmodulesr   r   r   r   <module>r     s   	 
 I I  ( ( V V **..!@#F#M# Cx~ CHTN Cx~ CKx~ K8D> d 
 
 JJNN7=D 
 	     8A{{3 
 '.  - ?1  $) D ( )F(G x~ G ./ "8D>  )/MB)   " d ! )F(G x~ G 1V0W x~ W $*VW d W 05 & 4 57 !4S> 6 )-  ,  (1*  JJNN#>DKT K JJNN=sCsJ D  ::>>"<cBcI
D I
 *3_ d 4 >DK    zz~~:C@CGjjnn89S@ >DK JJNN4Y[[ccR
       **..!@#F#M 

<=D  $  HJzz~~HWCD 
    *+     TX 5??<<PP WTX EOO==QQ X TX u88PP W PT u88LL SPT 99MM T
 JN hx)=)=(>(DEF M 	 	;	<=:;	= 	 	 (	;	<=:;	= 	   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  #   	+ c3h  FQAB P $)   (	?@A>?A	
	*  )-  ,  16 ($ 6 99@AAJ 		W )(3-  		"<==WFDCPWL )(3-  >D w9: CQU .3%*9M0N U4: 7=1 : 	 3HXseSj=Q4R    */ !4 /   &,?<& d  zz~~:;sB (NOSVV  JJNN#DEL  13  2 JJNNDcJcQ " JJNNQSVW
 / JJNN2y{{CPST
   *, T#Y + $47 d  !# DI "  ZZ^^.0A%'   ZZ^^.%'  DF::>>2ID%' (? @ 
 DF::>>2ID%' (? @ 

 "      JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  JJNN<=D  JJNNJKsR ( $'JJNN<cB$   

'LbQ JJNN#DjQ  JJNN<cBcI 
BC BD BK K KGC GD G *I 
 !& 1 1 1Ss JJNN68JKsR  JJNN+KSQUXX  ZZ^^$FLPSS  **..!@ASH
       !(3- 
   JJNNCDK !
   ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN=sCsJ D  !#  JJNN<cBcI 
 $%    -/ )            BJJNN#@!DE::>>"BCHCO  "&   %           "# &* #  $    JJNN:C@CG 
  1 11OUe>O>O5O [8$8  JJNNEsKsR #
 &* Xc] )(,  ,
C 
 67 S 7 &. s - !'7:!    T       I U8I+>+C%D E 
 ! D  : :
6T 6! !J *3:P:R# R $*:=$ D  ">!? $ ? 39IL3 )4  JJNN>DK T 
 3- ;;
 $&33[00bff=zJ   '33J?
    

<cBcI JJNN8#>#E        (- $  (          >DK # 
 $    ! zz~~5r: B&  , 3"  +-**.."D+ (3- 
 JJNNHISP 0    8#>#E$ E %* T )  %  $
 $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 W 37 )4 6 46 tCc3h/0 5	Xc] 	 %+1% c @ @Fh hVGG GGTD( D(NF# F#T 38W./ 7 -5g() 4 8K K\" T#Y * T#Y 2 UWhellELLI4OPQ V , , ckk(+ ,K! $   s   'Az4  z4 4
{{