
    sg&                    
   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlZd dlmZ d d	lmZ d d
lmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3 erd dl4m5Z5 da6da7de8d<   ejr                  ju                  e;d      Z<d Z=d Z>d%dZ?d%dZ@ej                  dk(  ZB ej                  e;      ZD eE       ZFde8d<   d%dZGd ZH	  ej                  eH        G d d       ZKej                  j                  d!d"      d#k(  s&ej                  j                  d$d#      d#k7  s e3       syeKj                          y# eJ$ r Y hw xY w)&    )annotationsN)FutureProcessPoolExecutorThreadPoolExecutor)BrokenProcessPool)partial)time)AnyCallableDictListOptionalSetTYPE_CHECKING) get_registered_device_interfaces)config)	CodeCacheFutureCppCodeCacheCppPythonBindingsCodeCacheCUDACodeCacheHalideCodeCacheLambdaFutureROCmCodeCacheTritonCodeCacheTritonFuture)_warm_process_poolAnyPoolSubprocPool)_async_compile_initializer)_set_triton_ptxas_path_worker_compile_triton)	_Faketqdmtqdm)has_triton_package)
HalideMetag        zOptional[float]_t0kernel_codec                 R    t                	 ddlm}   |         y# t        $ r Y yw xY w)zG
    Setup that must be done prior to forking with a process pool.
    r   
triton_keyN)caching_device_propertiestriton.compiler.compilerr*   ImportErrorr)   s    P/var/www/html/venv/lib/python3.12/site-packages/torch/_inductor/async_compile.pypre_fork_setupr/   5   s)     7 s    	&&c                 ~    t               D ]0  \  } }|j                         s|j                  j                          2 y N)r   is_availableWorkerget_device_properties)_device_interfaces     r.   r+   r+   H   s8    ?A <((*##99;<    c                 &    t         t               a y y r1   )r&   r	    r7   r.   _compile_startr:   N   s    
{f r7   c                 J    t         t               } t        | t         z
  z  ad a y y r1   )r&   r	   _cumulative_compile_time)t1s    r.   _compile_endr>   T   s'    
V BH,  r7   win32zSet[AnyPool]	_pool_setc                 N    t         D ]  } | j                           t                y)z/Shut down all outstanding compile-worker pools.N)r@   shutdown
after_fork)pools    r.   shutdown_compile_workersrE   f   s     Lr7   c                 h    t         j                          t        j                  j	                          y)z7Reset pools to initial state without shutting them downN)r@   clearAsyncCompileprocess_poolcache_clearr9   r7   r.   rC   rC   m   s    OO))+r7   )after_in_childc                     e Zd ZddZe ej                  d      dd              Zed        Ze ej                  d      dd              Z	e
dd       Ze
dd       Zd Zddd	Zdd
ZddZddZd Zd ZddZddZy)rH   c                     y r1   r9   selfs    r.   __init__zAsyncCompile.__init__z   s    r7      c                 ^    t         j                  dkD  sJ t        t         j                        S NrQ   )r   compile_threadsr   r9   r7   r.   rD   zAsyncCompile.pool}   s)     %%)))!&"8"899r7   c                      y)z>No-op function to help mark when the subprocess pool is ready.readyr9   r9   r7   r.   
_get_readyzAsyncCompile._get_ready   s     r7   c            	     B   t         j                  dkD  sJ t         j                  dk(  rt        t         j                        } nt	                t        j                  t         j                        }t        t         j                  |t        t        t        j                                     } t
        j                  j                  d | j                  t        j                          | j#                  t$        j&                        | _        t*        j-                  |        | S )NrQ   
subprocess)
mp_contextinitializer)exitpriority)r   rT   worker_start_methodr   r/   multiprocessingget_contextr   r   r   osgetpidutilFinalizerB   sysmaxsizesubmitrH   rW   ready_futurer@   add)rD   ctxs     r.   rI   zAsyncCompile.process_pool   s     %%)))%%5v556D!--f.H.HIC&&&#$>		LD   ))$CKK)X !KK(?(?@dr7   c                    t         j                  dk  ry t                t        | j	                         t         j                         t                y rS   )r   rT   r:   r   rI   r>   )clss    r.   	warm_poolzAsyncCompile.warm_pool   s7    !!Q&3++-v/E/EFr7   c                t    t         j                  dk  r |       S | j                         j                  |      S rS   )r   rT   rD   rf   )rk   tasks     r.   rf   zAsyncCompile.submit   s.    !!Q&6Mxxz  &&r7   c                |    t         j                  dkD  xr( | j                         j                  j	                         S rS   )r   rT   rI   rg   donerN   s    r.   _use_process_poolzAsyncCompile._use_process_pool   s5    ""Q& 8!!#00557	
r7   c                   t         j                  d|       t                t                t	        j
                  ||      }| j                         rqddg}|D ci c])  }|t        j                  v s|t        j                  |   + }}t        || j                         j                  t        |j                  |            S |j                          |S c c}w )NzTriton Kernel:
%sTORCHINDUCTOR_CACHE_DIRTRITON_CACHE_DIR)kernel_code_loginfor:   r    r   loadrq   r`   environr   rI   rf   r!   _reload_in_subproc
precompile)rO   kernel_namesource_code
device_strkernelenv_varsv	extra_envs           r.   tritonzAsyncCompile.triton   s    1;?  %%k;?!!# 23EFH3;OaqBJJBJJqM)OIO!!#***--  M Ps   C1Cc                    ddl m}  ||i |S )Nr   )MultiKernelCall)$torch._inductor.codegen.multi_kernelr   )rO   argskwargsr   s       r.   multi_kernelzAsyncCompile.multi_kernel   s    H ///r7   c                    t         j                  d|       t        j                  dk  rt	        j
                  |      j                  S t	        j                  || j                        t        fd      S )NzCPP Kernel:
%srQ   	submit_fnc                 &             j                   S r1   )r~   )
get_results   r.   <lambda>z"AsyncCompile.cpp.<locals>.<lambda>   s    
(;(; r7   )
ru   rv   r   rT   r   rw   r~   
load_asyncrf   r   )rO   r|   r   s     @r.   cppzAsyncCompile.cpp   s[    .<!!Q&$$[1888%00TJ ;<<r7   c                    t         j                  d|       t        j                  dk  rt	        j
                  ||      S t	        j                  ||| j                        }t        |      S )NzCPP+Bindings Kernel:
%srQ   r   )	ru   rv   r   rT   r   load_pybindingload_pybinding_asyncrf   r   )rO   argtypesr|   r   s       r.   cpp_pybindingzAsyncCompile.cpp_pybinding   s\    7E!!Q&-<<X{SS3HH+J  
++r7   c                `    t         j                  d       fd}| j                  |      S )NzCUDA Kernel:
%sc                 6    t        j                         d   S Nr   )r   rw   dst_file_extr|   s   r.   rn   zAsyncCompile.cuda.<locals>.task        %%k<@CCr7   ru   rv   rf   rO   r|   r   rn   s    `` r.   cudazAsyncCompile.cuda   +    /=	D {{4  r7   c                `    t         j                  d       fd}| j                  |      S )NzROCm Kernel:
%sc                 6    t        j                         d   S r   )r   rw   r   s   r.   rn   zAsyncCompile.rocm.<locals>.task   r   r7   r   r   s    `` r.   rocmzAsyncCompile.rocm   r   r7   c                    t         j                  d||       t        j                  dk  rt	        j
                  ||      S t	        j                  ||| j                        }t        |      S )NzHalide Kernel:
%r
%srQ   r   )	ru   rv   r   rT   r   generate_halidegenerate_halide_asyncrf   r   )rO   metar|   r   s       r.   halidezAsyncCompile.halide   s^    5t[I!!Q&"224EE(>>kT[[J  
++r7   c                L   t        |j                         D cg c]  \  }}t        |t        t        f      r| c}}      }t        |dt        j                  d      }t        j                  dkD  r|j                         D ]r  \  }}t        j                  r!t        |t              s|j                  |       t        |t        t        f      sN	 |j                         ||<   |j                  d       t t!                y c c}}w # t        $ r}t        d      |d }~ww xY w)NzInductor Compilationr   )totaldescdisabledelayrQ   zA compilation subprocess exited unexpectedly. This is likely due to a crash. To facilitate debugging, you can re-run with TORCHINDUCTOR_COMPILE_THREADS=1 to cause compilation to occur in the main process.)lenitems
isinstancer   r   r#   r   disable_progressrT   verbose_progressr"   set_postfix_strresultr   RuntimeErrorupdater>   )rO   scopekeyvaluenum_kernelspbarr   es           r.   waitzAsyncCompile.wait   s
    #(++-Cefo%>? 
 '++	
 !!A%${{} #V**:dI3N((-fv&?@!%+]]_c
 KKN# 	9& - !*Q
  !!!s   "D
D			D#DD#NreturnNone)r   r   )r   r   )rn   zCallable[..., Any]r   r
   )r   )r{   strr|   r   r}   r   )r   r
   )r|   r   )r   z	List[str]r|   r   )r   r%   r|   r   )r   zDict[str, Any]r   r   )__name__
__module____qualname__rP   staticmethod	functools	lru_cacherD   rW   rI   classmethodrl   rf   rq   r   r   r   r   r   r   r   r   r9   r7   r.   rH   rH   y   s     Y:  :   Y  2   ' '

.0=,!!,r7   rH   TORCH_TNT_IN_USE01TORCH_WARM_POOLr   )O
__future__r   r   loggingr^   r`   rd   concurrent.futuresr   r   r   concurrent.futures.processr   r   r	   typingr
   r   r   r   r   r   r   torchtorch._dynamo.device_interfacer   torch._inductorr   torch._inductor.codecacher   r   r   r   r   r   r   r   r   +torch._inductor.compile_worker.subproc_poolr   r   r   'torch._inductor.compile_worker.watchdogr   %torch._inductor.runtime.compile_tasksr    r!   	torch.hubr"   r#   torch.utils._tritonr$   torch._inductor.runtime.hintsr%   r<   r&   __annotations___logginggetArtifactLoggerr   ru   r/   r+   r:   r>   platform_IS_WINDOWS	getLoggerlogsetr@   rE   rC   register_at_forkAttributeErrorrH   rx   getrl   r9   r7   r.   <module>r      s`   "    	 
 N N 8   J J J  K "
 
 
 
 O & 2 8  _ ..228]K&< llg%g! %	< ,	Bz2
e eR JJNN%s+s2	zz~~'-4i  		s   E: :FF