
    sge                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZmZmZmZmZmZmZ d dlmZ d dlZd dlZd dlmc mZ d dlmZ d dlmZm Z!mZ"mZ# d dl$m%Z% d dl&m'Z' d d	l(m)Z)m*Z*m+Z+m,Z, d d
l-m Z. d dl/m0Z0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZBmCZCmDZDmEZEmFZFmGZGmHZH d dlImJZJ d dlKmLZL d dlMmNZNmOZO d dlPmQZQ d dlRmSZS ddlTmUZU ddlVmWZW ddlXmYZY ddlZm Z m[Z[ ddl\m]Z] ddl^m_Z_ ddl`maZa ddlbmcZcmdZd dd lemfZf dd!lgmhZh dd"limjZj dd#lmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZs dd$ltmuZu  e j                         r	d d%lwmxZxmyZy nd&ezfd'Zy ej                  e|      Z}ej                  j                  e|d(      Zej                  j                  e|d)      Zej                  j                  e|d*      Zd+ Zd,ej                  d-ee   d.ej                  fd/Zd,ej                  d.efd0Zd1 Z ej                  d      d2        Z ej                  d      d3        Zd4 Zd5 Zd6 Zd7 Zdzd8efd9Z	 	 d{d:ej$                  j&                  d;eeezef      d<eeej$                  j(                  gef      d.eej$                  j&                  eezef   f   fd=Zd:ej$                  j&                  fd>Zd?eej                     fd@Z	 dzd:ej$                  j&                  d?eej                     dAefdBZdC Zd|d.eezef   fdDZe j6                  dE        ZdF Z eydGH      	 	 	 	 	 	 	 	 	 	 	 d}d:ej$                  j&                  d?eej                     dIeeB   dJeee      dKedLee   dMedNed8edOee8   dPeeezdf      dQee   dReeeej   gef      d.ee5ezf   fdS       Z	 	 	 	 	 	 	 	 	 	 d~d:ej$                  j&                  d?eej                     dIeeB   dJeee      dKedLee   dMedNed8edPeeezdf      dQee   dReeeej   gef      d.ee5ezf   fdTZdUeeE   dJee   d.ee   fdVZ	 ddWdWdWdXdYedZef   dJee   d[ed\eeez      dKed8ed]eej                  dZf   d^ee<   d_eedZf   d.edZef   fd`Zdaej                  d.ej                  fdbZdcej                  ddej                  d-ee   fdeZ	 ddYedZef   dUeej                     dJee   fdfZedfdgej$                  j&                  dheej                     diedZef   djeeezef      fdkZ e	d       Zdlej$                  j&                  dmeej                     dnej$                  j&                  doediedZef   dIeBdLedpe8fdqZeddfdgej$                  j&                  dheej                     diedZef   djeeezef      dreeeLedZef   f      f
dsZd:ej$                  j&                  fdtZd:ej$                  j&                  dUeej                     duedZef   fdvZd:ej$                  j&                  dUeej                     duedZef   fdwZdxehd.dfdyZy)    N)count)AnyCallableDictListOptionalSequenceTupleUnion)mock)#min_cut_rematerialization_partition)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)countersdetect_fake_modeflatten_graph_inputslazy_format_graph_code)r   )aot_export_modulemake_boxed_func)_StrideExprStr	code_hashCompiledFxGraphFxGraphCache)BoxedDeviceIndexCudagraphCachedInfoget_placeholder_info#log_cudagraph_skip_and_bump_counterPlaceholderInfo)save_args_for_compile_fx_inner)	cache_dir)	BoxedBoolcount_tangentsfresh_inductor_cache	InputTypeis_gpushould_assume_input_alignedtensor_is_aligned)trace_structured)
OpOverload)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter   )aot_autograd)_use_lazy_graph_module)_PyTreeCodeGen   )r   metrics)DebugContext)select_decomp_table)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)ExternKernelNode)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_namehas_incompatible_cudagraph_ops#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)log_optimus_to_scubatime_and_logattrc                 "    t         j                  S N)dynamo_utilsidentityrL   s    M/var/www/html/venv/lib/python3.12/site-packages/torch/_inductor/compile_fx.pyrK   rK   _   s    $$$    
perf_hintspost_grad_graphscudagraph_static_inputsc                     t        | t        j                        sy t        | j                        D cg c].  }| j                  |      dk(  s| j                  |      dk7  s-|0 c}S c c}w Nr   r6   )
isinstancetorchTensorrangendimstridesize)tis     rR   get_expanded_dimsrb   o   sL    a&QVV}L!q(8QVVAY!^ALLLs   A)A)"A)r`   expanded_dimsreturnc                 l    |D ].  }t         j                  j                  j                  | |dd      } 0 | S rX   )rZ   opsatenslice)r`   rc   expanded_dims      rR   index_expanded_dimsrj   u   s4    % 8IINN  L!Q78HrS   c                    t        | t        |             j                         } t        j                  |       dk7  r| j                         }| j                  }t        t        t        |                  }t        t        ||            D cg c]  \  }}|	 }}}t        t        |            D ]6  }|dk(  rdn
|||dz
        }|dk(  rdn
|||dz
        }|||      ||z  k  s6 y yc c}}w )Nr   r6   TF)rj   rb   squeezerZ   _debug_has_internal_overlapr^   shapelistr\   lensortedzip)	r`   stridessizesindices_xra   prev_stride	prev_sizes	            rR   complex_memory_overlaprz   {   s    
 	A034<<>A((+q0((*uS\*+!'GW(=!>?A1??s7|$ 	A Av!771q5>+BK!Vwq1u~)>Iwqz"[9%<<		
  @s   C*c                     t         j                  j                  j                         }t	        t        |             }|r|j                  s|S ||j                  j                  z   S rN   )rZ   _guardsTracingContexttry_getro   r\   fw_metadatastatic_input_indices)	num_fixedcontextfixeds      rR   get_static_input_idxsr      sR    
 mm**224Gy!"E'--7&&;;;;rS   c                  4    t        j                  t              S rN   )dynamo_loggingget_step_loggerlog rS   rR   _step_loggerr      s    ))#..rS   c                     t         j                  j                         rgt         j                  j                  j                  j
                  s8t         j                  j                         dk\  rt        j                  d       y y y y )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	rZ   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   rS   rR   _warn_tf32_disabledr      sc     	

!##**55JJ,,.&8d	
 9 6 	"rS   c           	         ddl m}m} i }| j                  d      D ]   \  }}|||<    |||||j                         " | j                  d      D ]   \  }}|||<    |||||j                         " |j                  j                  d      }	g }
|	D ]  }|j                  }||j                  v r!|j                  |   }|
j                  |       >||j                  v rE|j                  |   }|
j                  |       t        ||         |j                  t        |      <   ||j                   v sJ |
j                  d         ddlm} t'        |j                  j(                        d	   j*                  d   }g }|j,                  }|j.                  }|j0                  }t3        |      D ]y  \  }}d }|t5        |      t5        |      z   t5        |      z   k  r;|j                  |v r||j                     }n|j                  |v r||j                     }|j                  |       {  |||
|t7        j8                         d |i       }|S )
Nr   )_assign_attr	_AttrKindF)remove_duplicate)	attr_kindplaceholder)op)_unlift)torch.export.unflattenr   r   named_parameters	PARAMETERnamed_buffersBUFFERgraph
find_nodesnameinputs_to_parametersappendinputs_to_buffersrA   metarC   user_inputstorch.export._unliftr   ro   nodesargsbuffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumeraterp   pytreeLeafSpec)modgmgraph_signaturer   r   
state_dictr   parambufferplaceholder_nodeslifted_inputsnode	node_nameparameter_namebuffer_namer   outputsmutated_outputsbuffer_mutationsuser_input_mutationsr   idxoutvalueunlifted_gms                            rR   _unlift_graphr      ss   >J++U+C 
e 
4))		

 ))5)A 
f!
4&&		

 ++}+=M " 'II	<<<,AA)LN  0/;;;);;IFK  - 'z+'>? GG0=  ; ;;;;  &' -288>>"2&++A.GO&88*@@#11Mg& 	&S%&-A)BBSEWWWxx++(211,SXX6u%	& 

K rS   c           	   #     K   t        t        j                  | j                  j	                  dt
        j                  j                  j                        | j                  j	                  dt
        j                  j                  j                                    D ]  }|j                  t
        j                  j                  j                  k(  r;|j                  d   j                  }|j                  d   j                  }| | o|j                  t
        j                  j                  j                  k(  s|j                  d   j                  }|j                  d   j                  }| |  y w)Ncall_functionr   targetr6   r2   r   )rq   	itertoolschainr   r   rZ   rf   higher_ordercond
while_loopr   r   r   )r   r   true_subgraph_namefalse_subgraph_namecond_subgraph_namebody_subgraph_names         rR   _get_subgraph_namesr      s    HH?599;Q;Q;V;VWHH"599+A+A+L+L   	
 % ;;%))00555!%1!2!2"&))A,"3"3$$%%[[EII22===!%1!2!2!%1!2!2$$$$#%s   D:E:==E:c                     t        |       D ](  }t        | |      }t        |d       }t        | ||       * t	        | |      S )N)example_inputs)r   getattr_recursive_pre_grad_passessetattrr=   )r   r   subgraph_namesubgraphnew_subgraphs        rR   r   r     sI    ,R0 12}-1(4PM<0	1
 2~..rS   c                 h    t        |       D ]  }t        | |      }t        |        t        |        y rN   )r   r   _recursive_joint_graph_passesr:   )r   r   r   s      rR   r   r     s4    ,R0 02}-%h/0 rrS   is_inferencec                 l    t        |       D ]  }t        | |      }t        ||        t        | |       y rN   )r   r   _recursive_post_grad_passesr;   )r   r   r   r   s       rR   r   r     s8    ,R0 <2}-#Hl;< R&rS   r   lifted_constantsskip_folding_node_fnc                 L   ddl m}m}m}m}m}  || ||      \  }}	t        t        |j                  j                        d   j                  d         D 
ci c]  \  }
}|j                  |
 }}
}g }g }i }| j                  j                  D ]V  }|j                  |v r|j                  |       #|j                  |   |k(  s6|j                  dk7  sF|j                  |       X |D ]>  }d|j                  z   } || ||	||j                        |       ||j                     ||<   @ |ddd   D ]X  }|j                  r/|j                  D ]  }|j                  |   |k(  rJ d| d        >| j                  j!                  |       Z | j#                          ||fS c c}}
w )	a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphr   r   _FOLDED_CONST_Nznode: z user not empty.) torch._inductor.constant_foldingr   r   r   r   r   r   tupler   r   r   r   r   r   r   users
erase_node	recompile)r   r   r   r   r   r   r   r   const_gmconst_resultr   rw   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                      rR   split_const_gmr   #  s   *  8
2Hl
 #,E(..2F2F,G,K,P,PQR,S"TQM  MO '99%""4(YYx $44M9Q  &	'   F)DII5"tyy12		
 .;499-E>*F dd# &::ZZ Wvvh':5VvEU7VV5W HH%& LLN'''=s   F c                 H   t         j                  j                  }|j                  j                  |j
                  j                  |j                  j                  |j                  j                  h}|D ]  }| j                  j                  d|      D ]  }t        |j                  j                  dd       t         j                        s8|j                  d   j                  t         j                  k(  sc|j                  d   j                   j"                  dk(  s  y  y)Nr   r   valr   TF)rZ   rf   rg   mmdefaultaddmmbmmbaddbmmr   r   rY   r   getr[   dtypefloat32devicetype)r   rg   tf32_opsr   r   s        rR   is_tf32_warning_applicabler	  e  s    99>>D

	H  HH''?6'J 	D499==5u||DIIe$**emm;IIe$++00F:	 rS   r   c                     t        d | D              }t        j                  r=t        j                  r-|s+t        j                  d       t        j                  d      S t        j                         S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3      K   | ]>  }t        |t        j                        st        |j                  j
                         @ y wrN   )rY   rZ   r[   r)   r  r  .0r`   s     rR   	<genexpr>z6maybe_disable_comprehensive_padding.<locals>.<genexpr>}  s/      "#Au||9Tqxx}}s
   A$Az!Skip comprehensive padding on CPUF)comprehensive_padding)	anyr   disable_padding_cpur  perf_hint_loginfopatch
contextlibnullcontext)r   has_gpus     rR   #maybe_disable_comprehensive_paddingr  x  s]    
  '5 G !!f&B&B7>?||%88%%''rS   force_allow_non_fake_inputsc                 j   t        |      }|s<t        j                  j                  d      } t	        | |      j
                  |  |S |st        j                         n t        j                  j                  |dd      }|5   t	        | |      j                  |  ddd       |S # 1 sw Y   |S xY w)z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)moder  N)r   rZ   _subclassesFakeTensorModer0   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   r   r  	fake_modectxs        rR   fake_tensor_propr%    s     !0I%%4444P	4r	*44nE  / ""$""9.EtL 	
  	LN2I.LL	
 	
 s   B((B2c                  V   t         j                  t         j                  S t        j                         syt        j                  j                         ry	 ddlm}  d}t        j                  j                  d}| t        j                  j                  |      k\  S # t        $ r Y yw xY w)NFr   )REMOTE_CACHE_VERSIONz.pytorch/remote_cache:fx_graph_memcache_versionz2pytorch/remote_cache:fx_graph_memcache_version_amd)r   fx_graph_remote_cache	is_fbcoderZ   _utils_internalis_fb_unit_testtorch._inductor.fb.remote_cacher'  ModuleNotFoundErrorversionhipjustknobs_getval_int)r'  jk_names     rR    should_use_remote_fx_graph_cacher2    s    ##/+++,,.H ?G}}$F5#8#8#M#Mg#VVV  s   B 	B('B(c                     t        j                  |       5  t        j                         cd d d        S # 1 sw Y   y xY wrN   )r   r  get_config_copy)config_patchess    rR   get_patched_config_dictr6    s/    	n	% (%%'( ( (s   4=c               #      K   t         j                  r#t        t               d      5  d  d d d        y d  y # 1 sw Y   y xY ww)NF)dirdelete)r   force_disable_cachesr'   r$   r   rS   rR   with_fresh_cache_if_configr;    s>     "" "ik%@ 		 	 		 	s   &A;AA Ac                     t        j                         5 }|j                  t        j                  j
                  j                                |j                  t        t        j                               |j                  t        j                  ddd             |j                  t                      |j                  t                       t        t        d      | i |cd d d        S # 1 sw Y   y xY w)Ncompile_fx_innerinductor_compileF)
phase_namefwd_onlyinductor)compiler_name)r  	ExitStackenter_contextrZ   r   _python_dispatch_disable_current_modesr4   dynamo_configuse_lazy_graph_modulerO   dynamo_timedr;  r8   r   _compile_fx_inner)r   kwargsstacks      rR   r=  r=    s    
 
			 
5EKK88OOQR2=3V3VWX%%"/AE	

 	689LN+O"#4JO


 
 
s   CC..C7zcompilation time (in seconds)rQ   
cudagraphsstatic_input_idxsis_backwardgraph_idcpp_wrapperaot_modeboxed_forward_device_indexuser_visible_outputs
layout_optextern_node_serializerc                 v  	 t        j                  | j                        dk(  r.|s,ddlm} |j                  |        t        | j                        S g t        j                  d       t        t        t        t        | j                  j                                    j                  d   t         t"        f      sJ d| j                          t$        j&                  rt)        | ||||||	|
|       #t+        t$        j,                  j.                        ||||||
||d
}t1        j0                         }t3               }t5        |      }	fd}t7        d	      j9                         5 }t$        j:                  st$        j<                  s|r|st?        |      D ]F  \  }}t        |t@        jB                        s!|jD                  jF                  d
k(  s;|v s@d|_$        H tK        jL                  || |||t$        j<                  |      }n/ || |||      }|r|cddd       S tK        jN                  ||      }ddd       tP        j                  dt1        j0                         |z
          tS               tT        jV                  d|rdnd d|        d_,        |S # 1 sw Y   fxY w)z
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )_LazyGraphModuleNz&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got )
rM  rN  rO  rP  rQ  rR  r   rS  rT  rU  )
rM  rN  rO  rP  rQ  rR  r   rT  rU  rV  c                    t        | |fi |}t        |t              r|S d}r|j                  rRd|j                  v rt        d|j                          nt        d   dxx   dz  cc<   t        j                         n3t        d |D              }t        j                  j                  sdd	lm}  || |      }|du}	|	r
||_        nd}	|	 d
ft        |        df| dft!        d |D              dfg}
t#        |       }t%        |j&                        dk(  sJ |j&                  d   D cg c]>  }t        |t(        j*                  j,                  j.                        r|j0                  nd@ }}|
D cg c]
  \  }}|r	| }}}t3        t5        | j6                              }t9        |||      }||_        ||_        ||_        |_         |S c c}w c c}}w )z
        This function calls fx_codegen_and_compile and also adds some extra metadata to the resulting
        compiled fx graph. The metadata is saved to FXGraphCache.
        Nr   zskipping cudagraphs due to rA  cudagraph_skipsr6   c              3   f   K   | ])  }t        |t        j                        rt        |       + y wrN   )rY   rZ   r[   rz   r  s     rR   r  zA_compile_fx_inner.<locals>.codegen_and_compile.<locals>.<genexpr>L  s+      4!!U\\2 +1-4s   /1r   )3check_for_mutation_ignore_cuda_graph_managed_tensorzmutated inputszincompatible opszcomplex memory overlapc              3   p   K   | ].  }t        |t        j                  t        j                  f       0 y wrN   )rY   rZ   r[   SymIntr  s     rR   r  zA_compile_fx_inner.<locals>.codegen_and_compile.<locals>.<genexpr>l  s,       ! 'q5<<*FGs   46znon-Tensor inputs)!fx_codegen_and_compilerY   strdisabled_cudagraphs_reasondevice_typesr!   r   r%   disabler  r   triton cudagraph_support_input_mutationtorch._inductor.cudagraph_utilsr\  rD   allrF   rp   r   rZ   fxr   Nodestack_tracer   r    r   r   cudagraph_infoinputs_to_check	fx_kwargsrS  )r   r   rl  rm  compiled_graphrk  complex_memory_overlap_inputsr\  has_mutation_strhas_mutationcudagraph_testsoutputargstack_tracesbscudagraph_fail_reasonsplaceholdersrS  rM  rN  s                     rR   codegen_and_compilez._compile_fx_inner.<locals>.codegen_and_compile0  s    0NPiPnc* "!88^88875n6_6_5`a Z():;q@;!!*-03 4+4 1- }}EE
 L*- % $44#?L#DTA $(L &%'787;;=OP668PQ %3  ,	# %R6;;'1,,,  &{{1~  )338J8J(KS__QUU    9H)Q1q!)Q&)Q$%9"((%CD!4 ,0F" )7%)8&#, 4N1  *Rs   0AG:
GGz+pytorch.wait_counter.fx_codegen_and_compiler   T)localremotez%FX codegen and compilation took %.3fsztorchinductor done compiling 	BACKWARDSFORWARDS graph )-rO   count_callsr   torch.fx._lazy_graph_modulerX  force_recompiler   forwardstatic_inputs_logdebugrY   nextiterreversedr   r   r   ro   r   	save_argsr#   r%   rd  rM  timer2  get_input_idxs_to_checkr1   guardr:  fx_graph_cacher   rZ   r[   r  r  _is_inductor_staticr   loadpost_compiler   r   r   INFO_boxed_call)r   r   rM  rN  rO  rP  rQ  rR  r   rS  rT  rU  rV  rX  graph_kwargsstartr(  rl  rz  rv   ra   inputrn  s     ``     `             rR   rJ  rJ    s   , )Q.x 	A((,rzz** DFWXT(288>>*+,11!4udm \	PQSQYQYPZ[\  &!/##%'A!5!	
 v}}778
 !.""$ 4 "8L IIKE<>-n>OPOUn 
C	D	J	J	L "PQ++&&*?%n5 55uell3))V3..04E-5 *..#++,N 1NO\N  &?" "@ *66
NA"H II5tyy{U7JKLN'%;:
6 7
	 "&N]" "s&   7AJ/J/"J/'AJ/3J//J8c                 n   $% t         j                  x},dd l}t        j	                  d|        |j
                  |       t        j                         5  t               r
t                t        d   j                         }t        j                  t        t        j                         d              t!               t"        j$                  d|rdnd d|         fd	$t&        j(                  j+                  d
d $fd       t,        j.                  j1                          t3              }t5                t'        j6                         5  t9               }d d d        t-        j:                        5  t=         |       t,        j.                  j?                          t@        j/                  dtC        d ddd             t+        d fd       t        jD                         rtG        dtI        tK                     i       d d d        t-        j:                  |      5  tM              5  d }d }d }|rt         jN                  jP                  rmtS               \  }}tU        |g |||||	||d
      }t-        jV                  |      5  |sJ d       |jY                          |j[                         \  }}d d d        tU         |||||	|||||      }t]        j^                         }t-        jV                  |      5   |jX                    g }|j`                  tc               %|j`                  D ]  }te        |d      rctg        ti        |jj                  jl                              dk(  r8|jo                  tq        %fd|jj                  jl                  D                     r|jo                  d         ts        |       |ju                         }|jw                         \  }}}t\        xjx                  |z  c_<        t\        xjz                  |z  c_=        t\        xj|                  |z  c_>        |r.t         j~                  j                  rt,        j                  j                  st'        j                  j                  j                   rd } j                  j                  D ]  }|j                  j                  dd       } |j                  dk(  sCt        | t&        j                        r)t&        j                  j                  j                  |       sr|j                  j                  dd       x}s n d }!|r	|! d!| d"}!n|! d"}!|!t,        j                  _B        t,        j                  du r&|cd d d        cd d d        cd d d        cd d d        S |rSt,        j                  j                  s9dd#lMmN}"  |"t,        j                  j                        t,        j                  _B        t        |||t,        j                  j                  |j                         t        d   |z
        }#d d d        d d d        d d d        #cd d d        S # 1 sw Y   xY w# 1 sw Y   \xY w# 1 sw Y   xY w# 1 sw Y   KxY w# 1 sw Y   OxY w# 1 sw Y   SxY w# 1 sw Y   y xY w)$Nr   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setrA  i  ztorchinductor compiling r}  r~  r  c                      t        j                         } t        j                  j                  j
                  j                  | dd        | j                         S )NrA  )save_dir)ioStringIOrZ   _dynamorepro	after_aotsave_graph_reprogetvalue)fdr   r   s    rR   log_graph_runnablez2fx_codegen_and_compile.<locals>.log_graph_runnable  sJ    BMM))::B
T ;  ;;= rS   artifactc                      dddS )Nfx_graph_runnablestring)r   encodingr   r   rS   rR   <lambda>z(fx_codegen_and_compile.<locals>.<lambda>  s    +$! rS   c                               S rN   r   )r  s   rR   r  z(fx_codegen_and_compile.<locals>.<lambda>  s    13 rS   )metadata_fn
payload_fnr   z%szAFTER POST GRADT)include_strideinclude_devicecoloredinductor_post_grad_graphc                  ,     j                  ddd      S )NFT)print_outputr  r  )print_readable)r   s   rR   r  z(fx_codegen_and_compile.<locals>.<lambda>#  s    2#4#4!&tD $5 $ rS   )r  pt2_configs)extra_logging)	r   	shape_envrP  rQ  rR  rT  rV  r   is_const_graphz"AOT mode only supports C++ wrapper)r   r  rP  rQ  rR  rT  rV  r   r   
const_codeconst_modulelayoutc              3   @   K   | ]  }j                  |        y wrN   )doprint)r  rw  ps     rR   r  z)fx_codegen_and_compile.<locals>.<genexpr>h  s     %Nqaiil%Ns   r   r   rj  zWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
) check_lowering_disable_cudagraph)Rr   sleep_sec_TESTING_ONLYr  r   warningsleeprO   preserve_rng_stater	  r   r   copysyssetrecursionlimitmaxgetrecursionlimitr   r   r  rZ   _loggingr,   rI   r  fx_graphrH   r<   no_gradr%  set_fake_moder   fx_graph_transformedpost_grad_graphs_logr   r)  rJ   r`  r6  r  aot_inductoruse_runtime_constant_foldingr   r>   set_graph_handlerruncodegen_with_cpp_wrapperr7   CachedMetricsHelpergraph_outputsr/   hasattrrp   r.   r  r^   r   r   _check_triton_bf16_supportcompile_to_fncount_bytesnum_bytes_accessednode_runtimesnodes_num_elemrd  cudagraph_skip_dynamic_graphsr   disable_cudagraphs_reason	_inductorr   any_is_symbolicr   r   r  r   rY   r[   aot_compilationrf  r  device_node_mappingr   
get_deltas)&r   r   rM  rN  rO  rP  rQ  rR  r   rT  rU  rV  	sleep_secr  inductor_countersr  r#  r   const_graphr  r   rv   r   metrics_helperoutput_stridesr   compiled_fn	num_bytesr  r  rj  r   meta_valrc  r  rn  r  r  s&   ``                                  @@rR   r_  r_    s     222	?I9U

9		(	(	* T%b)!$Z0557 	c#"7"7"94@ALL&){z: ;J 	
	! 	'' 4 	( 	
 	
^,
 *.9	$ 	 ]]_ 	=(^<I	= __Y' 	'FGG((^< &&&%#'#' 	 * !$#0#6M6O2P"Q+	2 __Y' t	)L*
 t	 "&KJF//LL/=b/A,,+#%'% +%)=+A!-#' ((5 K&L(LL;OO%$/$H$H$JMJ	K "  .#!'!%9'=)#5%(E" %88:N$$U+ F		>*MO&&2 '(A$22 
8#C2 #$9#**:K:K$L MQR R +11 %%NCJJ<M<M%N N +11$7
8 +51#113;@;L;L;N8	>=**i7*%%6%&&.8& CCGG==--==~N"&K " 
"#'99==#= GG}4#-h#E#(??#8#8#H#H#R$*.))--t*LL;L!
" xG"%,I\+b"I%,IR.8?AGG5$$,&iF F]t	 t	 t	}T TD agg&G&G
 99T9TU GG5 "1"GG55"--/Z(+<<"F]t	 t	l iT T|	= 	=	 	^K K0F F]t	 t	 t	 t	}T Ts   C=Z+Y !Z+>BY-Z+0Z<AZ-Y:	AZH9Z	=Z		Z	Z	Z+(BZ	=ZZ	Z+ Y*	%Z+-Y7	2Z+:Z?ZZZZZZ(	$Z++Z4inputsc                 d   g }t        |       D ]  \  }}t        |t        j                        s!t	        |j
                  j                        sAt               5  ||v rt        |      r
	 ddd       et        |      s
	 ddd       z	 ddd       |j                  |        |S # 1 sw Y   xY w)z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)r   rY   rZ   r[   r)   r  r  rE   r+   r*   r   )r  rN  ids_to_checkra   r  s        rR   r  r    s     Lf% 5%.ell''(02 	 %%*;E*B		 	
 /u5	 	
 6	 	A), 	 	s   B&3B&&B/	r   )	constantsry  mutated_input_idxsmodel.device_indexru  r  ry  r  c          
           ddl m}	 t        j                  j                  rt        j                  |	|||||||      nt        d  fd}
|
S )Nr   )cudagraphify_impl)r  ru  rO  r   r  ry  r  c                     Et        j                  d      5  t        j                         5   |       d d d        d d d         |       S # 1 sw Y   xY w# 1 sw Y   xY w)Ncudagraphify)rO   rI  r  )
new_inputsr  cudagraphify_fnr  rN  s    rR   r  zcudagraphify.<locals>.run  sv    ** T..0T .eZARST T :&&	T T T Ts!   AAAA	AA&)torch._inductor.cudagraph_treesr  r   rd  cudagraph_trees	functoolspartial)r  rN  r  ru  rO  r   r  ry  r  new_cudagraphify_implr  r  r  s   ``         @@rR   r  r    sW    
 }}$$#++!%%#%%1	
 ,K' JrS   rw   c                     t        j                  | j                         | j                         | j                  | j
                        S )z1
    Copy and input while preserving strides
    )r  r  )rZ   empty_stridedr_   r^   r  r  )rw   s    rR   static_inputr    s/     qvvx177188TTrS   dstsrcc                 V    t        | |      } t        ||      }| j                  |       y)z=Index into expanded dimensions of both dst and src then copy_N)rj   copy_)r   r  rc   s      rR   index_expanded_dims_and_copy_r    s'     c=
1C
c=
1CIIcNrS   c                   	
 t        |      }t        |      t        ||       t        |t              sJ t        |      D cg c]  \  }}|vrt        |      ng  c}}t        |      D cg c]@  \  }}t        |t        j                        s|n|vrt        |      n|j                         B c}}t        t        |            D ]8  \  }\  }}t        |t        j                        s$|vs)t        |   ||       : t        j                  j                          t        j                  j                         }|j!                  t        j                  j#                                t        j                  j%                  |      5   | t	                     ddd       |j                          t        j                  j#                         j!                  |       t        j                  j                          t        j                  j'                         
t        j                  j)                  
|d      5   | t	                    ddd       t        t        t*        f      sft,        j.                  r

fd}n0t1        t3                    D cg c]	  }|vs| c}		
fd}t5        ||      S c c}}w c c}}w # 1 sw Y   :xY w# 1 sw Y   xY wc c}w )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    Nthread_local)streamcapture_error_modec                 ^   t              t        |       k(  sJ t        t        |             D ]Y  \  }\  }}}t        |t        j
                        s%|v r$|j                         |j                         k(  rKJ t        |||       [ | j                          j                          	S rN   )
rp   r   rr   rY   rZ   r[   data_ptrr  clearreplay)
r  r   r   r  rc   r   inps_expanded_dimsrN  static_inputsstatic_outputss
        rR   r  zcudagraphify_impl.<locals>.run=  s    }%Z8882;M:/AB3 K..c3 "#u||4--<<>S\\^;;;
 2#sMJK LLN!!rS   c                     D ]  }|   }t        |   | |   |        | j                          j                          S rN   )r  r  r  )r  r   rc   copy_indicesr   r  r  r  s      rR   r  zcudagraphify_impl.<locals>.runT  sQ    #  23 7-!#&
3
 LLN!!rS   )r  rG   rB   rY   ro   r   rb   rZ   r[   r  detachrr   r  r   synchronizeStreamwait_streamcurrent_streamr  	CUDAGraphr   r   r   size_assertsr\   rp   r@   )r  r  rN  check_input_idxsr   rw   rc   r  r  r  r   r  r  r  s     `      @@@@@rR   r  r    s    /v7HI3F<MN6#34fd###  'C !$+< <!"D  ' C	 !U\\* 	
 '' !_XXZ		M $-S9K-L#M Paa&36G+G)-*<aOP
 
JJZZ F
uzz0023			6	" #d=!"#
	JJ++F3	JJ JJ  "E			%>		R 4tM234ntUm4(*	" 	"( !]!34
CT8TC
	" 	" (-=>>S&# #4 44
s1   K8AKK#K01	K<;K<#K-0K9model_example_inputs_inner_compiler5  c           
         |ddini |ddi}d|vr5t         j                  j                  si |dt        | j                        i}|j                  dd       }t        j                  d      5  t        | |t        j                  |d|      |      }t        j                  j                  |      s
J d|        |cd d d        S # 1 sw Y   y xY w)NrQ  Tzaot_inductor.output_pathrV  )rR  rV  )r  r5  z/AOTInductor compiled library does not exist at )r   r  output_pathr   codepoprI   set_aot_compilation
compile_fxr  r  ospathexists)r  r  r  r5  rV  compiled_lib_paths         rR   compile_fx_aotr'  a  s     ! 
44t4  	#.8##//

&	&++(>

 ,//0H$O	
		t	$ !&#++'=
 *	
 ww~~
 	Q<=N<OP	Q 
 !! ! !s   /AC		Caot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsforward_devicec                    ddl m}m}	 t        |        t	        j
                  | d      }
|
rt        | |d        ||         |	|| |      \  }D cg c]  }||   	 }}t              |z
  }t        |      }|j                  j                  ^ }}|j                  d   }t        j                  d |D              }t        t        |            }t         j"                  j$                  j'                         }|X|j(                  }|J t        t        |            D ]  }|vsd ||<    |j*                  r||j*                  j,                  z  }t.        j0                  j3                  |dd      5   ||||||d||
|	      d d d        t4        j6                  du rS fd}d|_        |S c c}w # 1 sw Y   1xY w)	Nr   )%convert_conv_weights_to_channels_lastfreezeTr  c              3   ~   K   | ]5  }t        |t        j                  j                        s(|j                   7 y wrN   rY   rZ   rh  ri  r   r  r   s     rR   r  z'fw_compiler_freezing.<locals>.<genexpr>  s*      )Auxx}})E)   )==r  )rN  rM  rP  r   rS  rU  rT  c                 b    D cg c]  }| |   	 }}| j                           |      S c c}w rN   )r  )r   ra   args_newoptimized_functionpreserved_arg_indicess      rR   wrapperz%fw_compiler_freezing.<locals>.wrapper  s3    %:;DG;;

!(++ <s   ,)torch._inductor.freezingr.  r/  r   r>   decide_layout_optr%  rp   r   r   r   r   dictfromkeysro   r\   rZ   r|   r}   r~   params_flatr   r   r   r  r!  rI   r  r  )r(  r)  r*  r+  r  rM  rP  r,  r.  r/  rU  	opt_modelindr   r#  rv   model_outputs_nodemodel_outputsrT  rN  tracing_contextr=  ra   r8  r6  r7  s                           @@rR   fw_compiler_freezingrC    s    W ""45001CRVWJ+-?F-.@A'-($I$ >SSc,S1SS)*-??I !34I '__22Q&++A.M== )%)  U9-.mm22::<O"%11&&&s;'( 	&A--!%A	& &&!<!<!Q!QQ			9&=t	D 
*/!'5!!5


 	D !!,
 GNe T2
 
s   F47F99Gdecompositionsc                 \    t        t        j                        5  	 |rPt        j                  |      5  t         | t        j                  |            |      cd d d        cd d d        S t        j                  rt        j                  dt        j                         rt        j                  j                  n"t        j                  j                  dd      dk(  dddd      5  t        j                  |      5  |}t         t         j"                  j$                        r j&                  j(                  D cg c],  }|j*                  dk(  r|j,                  j                  d      . }}t/        d	 |D              rbt1        t3               ||      D ]H  \  }}	}
|	j4                  |
j4                  k7  s!t7        d
| d|	j4                   d|
j4                   d       |}t         |t9        j:                  d      |      cd d d        cd d d        cd d d        S t9        j:                  t
        |      }t=               st?         ||      cd d d        S t         t         j"                  j$                        rFt         j&                  j@                  tB              rtE         ||      cd d d        S tG         |       tI        d |D              rtK         ||      cd d d        S t        jL                  rJ tO        |      tQ        t        j                  jR                        tU        d       tW        tX              ||n	t[               }dt         j"                  j$                  dt\        t         j^                     dt`        ffd}dt         j"                  j$                  dt\        t         j^                     dt`        f fdt9        j:                  |d      }t        jb                  r5t!        jd                         s!t9        j:                  tf               }nt9        j:                  |d      }d }dt         j"                  j$                  dt\        t         j^                     ffd}ti        |      xs  t         jj                  jm                  d      }t         jn                  jp                  js                         xs t         jn                  jq                  |      }t        jt                  du rtw        j                  d      5  ty         |d|      \  }}d d d        t{               }d j,                  v r j,                  d   |j,                  d<   t         j|                  j                         }|rt         j|                  j                  nt        j                  }t        j                  |      5  t        j                         5   |       5   |||      cd d d        cd d d        cd d d        cd d d        S t        j                  |      5  t         jn                  j                  |      5  t        j                         5  tw        j                  d      5   t        |||||d       |      cd d d        cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   ixY wc c}w # 1 sw Y   nxY wd d d        # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        # 1 sw Y   xY w# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)N)r  rD  F-TORCHINDUCTOR_TRITON_AUTOTUNE_AT_COMPILE_TIME1T)rQ  ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubinr   r   c              3   $   K   | ]  }|d u 
 y wrN   r   )r  vs     rR   r  zcompile_fx.<locals>.<genexpr>  s     >Q1D=>s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.)rQ  c              3   R   K   | ]  }t        |t        t        t        f       ! y wrN   )rY   ro   r   r;  )r  rw   s     rR   r  zcompile_fx.<locals>.<genexpr>.  s     Kaz!dE401Ks   %'r  r   r   c                 n    t        j                  d      5   | ||      cd d d        S # 1 sw Y   y xY w)Nz$compile_fx.<locals>.fw_compiler_base)rO   rI  )r  r   r   _fw_compiler_bases      rR   fw_compiler_basez$compile_fx.<locals>.fw_compiler_base@  s9    
 **+QR N(MN N Ns   
+4c           
      B   |rt        |        t        j                  j                  j	                  t        |            }i }t        j                  r3t        |       }t        j                  |j                   }t        |      }t        j                  j                  j                         }|%|j                  r|s|j                  j                   }	nd}	t#        t        j$                  j&                        rXj(                  j*                  ^ }
}|j,                  dk(  sJ t        j.                  |j                        \  }}
t        |      }n|}||k  sJ |	|z   }||k  sJ t0        j3                  d ||	| D              } | |t5        |      ||      S )Nr   rs  c              3   |   K   | ]4  }t        |t        j                  j                        r|j                   6 y wrN   r1  r2  s     rR   r  z8compile_fx.<locals>._fw_compiler_base.<locals>.<genexpr>  s0      5 "!UXX]]3	 FF5s   :<)rN  rM  rP  r   rS  rT  )r   rZ   r  r   num_fw_fixed_argumentsrp   r   keep_output_striderF   r   arg_tree_leavesr   r|   r}   r~   r   num_mutated_inp_runtime_indicesrY   rh  GraphModuler   r   r   tree_flattenr;  r<  r   )r  r   r   r   rT  r@  rA  num_model_outputsr   original_output_start_indexrv   orig_model_outputs_nodeorig_model_outputsnum_orig_model_outputsorig_output_end_idxrM  r,  rP  r  r  r+  s                  rR   rL  z%compile_fx.<locals>._fw_compiler_baseH  s   
 -e4OO))@@"C$7E $& ((%0%7" & 6 68J8O8O P$'$6!--66>>@&7+>+>|++KK 0 34/fehh&:&:;28,,2D2D/Q/255AAA,2,?,?/44-)& .11C-D*->*-1BBBB  02HH $
 +.????'+}} 5*34G5 ($ !"7">%!)+9%9	 	rS   r  )r*  r+  r  rM  rP  r,  c                 :    t        |        t        | |fi |ddiS )NcompilerrA  )r   r   )r   joint_inputsrK  s      rR   partition_fnz compile_fx.<locals>.partition_fn  s,    )%06|'-8B rS   c                 ^   t        j                  d      5  i }t        j                  rCt	        |       }t        j                  |j                   }t        j                  d |D              }t        |       } 	| |t        t        |            d|      cd d d        S # 1 sw Y   y xY w)Nzcompile_fx.<locals>.bw_compilerc              3   ~   K   | ]5  }t        |t        j                  j                        s(|j                   7 y wrN   r1  r2  s     rR   r  z2compile_fx.<locals>.bw_compiler.<locals>.<genexpr>  s*      9#$Auxx}}9U9r3  T)rN  rM  rO  rP  rS  rT  )rO   rI  r   bw_outputs_user_visiblerF   r   rR  r   r;  r<  r&   ro   r\   )
r  r   rT  r@  rA  r   rM  r,  rP  r  s
         rR   bw_compilerzcompile_fx.<locals>.bw_compiler  s     **+LM ')$11)4U);&$*$:$:<N<S<S$TM+/== 9(59 ,( 'u-$"&*5<&8) $%/=)=	  s   BB##B,r  )unlift_effect_tokens)trace_jointrD   dynamo_flat_name_to_original_fqn)fw_compilerrc  inference_compilerrD  r_  keep_inference_input_mutationsrM  )Hr4   rG  rH  r   r  r"  rQ  r)  rd  autotune_at_compile_timer#  environr  rI   set_real_inputsrY   rZ   rh  rT  r   r   r   r   rg  rr   r   r  
ValueErrorr  r  graph_returns_tuplemake_graph_return_tuple_codegenr5   handle_dynamo_export_graphr   r  r   _raise_error_for_testingrp   r%   rM  r   r  _graph_counterr9   r   r[   boolfreezingis_grad_enabledrC  r   r  r  r|   r}   r~   r  functorch_configr   r   _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   rc  tracingr3   )r  r  r  r5  rD  inputs_r   fake_inputsr   fira   recursive_compile_fxrM  rg  rh  r_  rc  r#  rB  r   r   r   disable_ampr   rL  rM  r,  rP  r+  s   ` `                     @@@@@rR   r"  r"    s    
  C C	D X'9n- !#">&,,~">}"M#1 X' X' #( '') 8>}}7]7]G  16).*. &   1&  *fehh&:&:; %+LL$6$6# 77m3 		e,#K #
 >+>>*-eg{G*L "JCQ!yyAHH4&0&hilhmmo')yykahhZ @o%o'" !"" #.!"+"3"3Mt"T#1	C& & &X' X'j  )00') 
 #6**$yX' X'D fehh223&,,//@1#(KX' X'V 0HFK?KK'$]X' X'h 2222 1v}}778
)$/' -8N>Q>S 		N88''	N .	N 	NN	88''N	 .N	 N	 N	`  ''(8uM??5#8#8#:!*!2!2$##5+%!-" "+!2!23CRV!W		88''	9=ell9K	 	8 %
 J--D-I 	 MM((002 7}}++I6 	
 $!''TB &7# %#1	'#O (ODK1V[[@GM{{6H  !CD  ((;;=K-8))j>T>T  + H->-F-F-H H') H)+GH H H HQX' X'V __Y' 	')>)>*
 	'$$&	'(8(>(>!%)
	'
<''#5-)/3% o'	' 	' 	' 	' 	'WX' X' 8#%& & & & &H .H H H H H H H H	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	'WX' X' X'sx  ^"(["	^"5A;^"0[<?[&1[!
6A	[& A[&	[<	^"+3^"(A^"*^":H6^"0\	B^"!]6\+>	\		\+	]	^",^" ^!]86]#	]&	]#	/	]88	^	^"[	^"![&&[/+[<2
^"<\	^"	\	^"\\+"	]+\40]7
^"]	^"]]#		]8#],(]8/	^8^=^	^"^	^""^+c                    t        | t        j                  j                        syt	        |       j
                  \  }t        |t        t        f      ryt        |t        j                  j                  j                        rst        |j                  d      r]t        |j                  j                  j                        dkD  r1t        d |j                  j                  j                  D              ryy)z"True if a FX graph returns a tupleT_schemar6   c              3   L   K   | ]  }t        |j                        d k(    yw)r[   N)r`  r  )r  rets     rR   r  z&graph_returns_tuple.<locals>.<genexpr>  s     OcCHH)Os   "$F)rY   rZ   rh  rT  rF   r   ro   r   r   ri  r  r   rp   r  returnsrg  )r   rvs     rR   rn  rn    s    b%((../O  ER"tUm$2uxx}}))*BIIy)		!!))*Q.ORYY5F5F5N5NOO rS   
compile_gmc                    t        |       }|j                  \  }t        j                  |      \  }| j                  j                  |      5  | j                  j                  |       ddd       | j                  j                  |       t        |       sJ  || |      t        j                        fd       }|S # 1 sw Y   ZxY w)z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nc                  <    t        j                   | i |      S rN   )r   tree_unflatten)r   rK  r  specs     rR   r8  z(make_graph_return_tuple.<locals>.wrapper!  s     $$[$%A&%A4HHrS   )rF   r   r   rU  r   inserting_beforers  r   rn  r  wraps)r   r  r  r   r  r8  r  r  s         @@rR   ro  ro    s     r?DIIER""2&HB		"	"4	( 
HHr"""R(K__[!I "I N s   CCc                 ,   | j                   j                  t        j                  j                   j	                         | j                   _        | j                           ||  j                  |       t        j                        fd       }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    c                  F    j                    j                  |         S rN   )process_outputsprocess_inputs)r   codegenr  s    rR   r8  z+handle_dynamo_export_graph.<locals>.wrapper7  s'    &&{4JG4J4JD4Q'RSSrS   )	r   rp  rZ   rh  CodeGenr   r  r  r  )r   r  r  r8  r  r  s       @@rR   rq  rq  (  sx     hhG..0BHHLLNR!7!7!7!@AK__[!T "T NrS   r   c                 X   dd}| j                   j                         D ]}  } t        |dd              }t        |j                        r!|j                         t        j                  k7  rMt        |j                        }|j                  d      r y  ||        | j                  D ]}  } t        |dd              }t        |j                        r!|j                         t        j                  k7  rMt        |j                        }|j                  d      r y  ||        y )Nc                     ddl m} t        | j                        }|j	                  |       }t        j                  |j                   d        |d      )Nr   )	SkipFramez9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excr  r   r  get_device_propertiesr   r   r   )r  r  device_interfacedevice_propss       rR   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skip?  sP    /3FKK@'==fE  !!Z[	
 /00rS   
get_devicec                  ,    t        j                  d      S Nr   rZ   r  r   rS   rR   r  z,_check_triton_bf16_support.<locals>.<lambda>J      ELL4H rS   F)including_emulationc                  ,    t        j                  d      S r  r  r   rS   rR   r  z,_check_triton_bf16_support.<locals>.<lambda>U  r  rS   )rd   N)graph_inputsvaluesr   r)   r  	get_dtyperZ   bfloat16r   is_bf16_supportedr  )r   r  inpr  r  r   s         rR   r  r  >  s    1 !!((* 	Il,HIKv{{#5>>(I 4FKK@--%-Hf	 "" 	Il,HIKv{{#5>>(I 4FKK@--%-Hf	rS   )F)NNrN   )NNFNFFFNNNN)
NNFNFFFNNN)r   )r  r  r  r   r   r#  r  r  r   r   typingr   r   r   r   r   r	   r
   r   unittestr   torch._inductor.async_compilerZ   torch.fxtorch.utils._pytreer   _pytreer   functorch.compiler   torch._dynamor   r   rG  r   rO   torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   r   r   r   torch._functorchrw  torch._functorch.aot_autogradr   r   torch._inductor.codecacher   r   r   r   rf  r   r   r    r!   r"   torch._inductor.debugr#   %torch._inductor.runtime.runtime_utilsr$   torch._inductor.utilsr%   r&   r'   r(   r)   r*   r+   torch._loggingr,   
torch._opsr-   %torch.fx.experimental.symbolic_shapesr.   r/    torch.fx.passes.fake_tensor_propr0   torch.monitorr1   _dynamo.backends.commonr3   fx._lazy_graph_moduler4   fx.graphr5    r7   r  r8   decompositionr9   fx_passes.joint_graphr:   fx_passes.post_gradr;   r<   fx_passes.pre_gradr=   r   r>   irr?   r@   rA   rB   rC   rD   rE   rF   rG   rH   virtualizedrI   r)  torch._inductor.fb.utilsrJ   rK   r`  	getLogger__name__r   r  getArtifactLoggerr  r  r  rb   r[   intrj   rt  rz   r   	lru_cacher   r   r   r   r   r   r   rh  rT  ri  r   r	  r  r%  r2  r6  contextmanagerr;  r=  rJ  r_  r  r  r  r  r  r'  rs  rC  r"  rn  ro  rq  r  r   rS   rR   <module>r     s     	   	 
    N N N  $  $ $ A  D =  8 L   A ;   , ! W ; & 2 : %   . 5 B /    
 
 
  6KK%3 % g!00<H~~77BTU NN44' M5<< S	 ell ell t &
< T/ / T	
 	
GT%*/'$ ' 26FJ?(?(tCH~.?( #8UXX]]OT,A#BC?( 588c3h/0	?(D588#7#7 &(U\\8J (& ).& "&8W,(DcN (
  
* 23 '+-1"=A6:!%PTTT&T #T  S	*	T
 T smT T T T !))9 :T #4T	?3T T %Xt4D/E.F.K%LMT ?C T 4Tt '+-1" 7;!%PTjj&j #j  S	*	j
 j smj j j j #4T	?3j j %Xt4D/E.F.K%LMj ?C jZ O }  c] J (** +-.0*,*CH*}* 	*
 x}%* * * U\\3&'* ?+* c3h* c3h*ZUELL UU\\ U		 9 (*W?CHW?W?  }W?z )9/3	#!HH  #!%,,'#! CH%#! T#s(^,	#!L qM,,MU\\*M ((&&M 	M
 CH%M M M %Mf )9/3EI_'HH  _'%,,'_' CH%_' T#s(^,	_'
 T*hsCx.@"@AB_'D	EHH00 $ c"4 c",m  rS   