
    sg>             	          d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlm Z m!Z! d dl"m#Z# d d	l$m%Z%m&Z& d d
lm'Z'm(Z( d dl)m*Z* d dl+m,Z, d dlm-Z- d dl.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z? d dl@mAZA d dlBmCZC ddlDmEZEmFZF ddlGmHZHmIZImJZJmKZKmLZLmMZM ddlNmOZOmPZPmQZQmRZR ddlFmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZm[Z[ ddl\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZd ddlemfZf ddlgmhZh ddlimjZjmkZkmlZlmmZmmnZnmoZo ddlpmqZqmrZr erd d lsmtZt dd!lumvZv d d"lwmxZx  ej                  ez      Z{ej                  j                  ezd#      Z~ej                  j                   Z ej                         Z eEj                         rd d$lmZ n
d%ed&ed'dfd(Zd)ej@                  d*ed'efd+Zd,ej8                  d'eej                     fd-Zd.ed'efd/Zd0e-d1ed'ee!ej                  j                  e-f   fd2Zd3e;d4eeedf      d'dfd5Z G d6 d7ej                   j"                        Zy)8    N)defaultdict)contextmanager)
ModuleType)AnyCallableDefaultDictDictIterableListNoReturnOptionalSequenceTupleTYPE_CHECKINGUnion)Expr)deviceTensor)get_decompositions)defakedynamo_timed)
LazyStringtrace_structured)make_channels_last_strides_for)
FakeTensor)GraphModule)BackwardStatemagic_methodsmethod_to_operator)free_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSymTypes)Graph)Node)no_dispatch)
OrderedSet)int_oo   )configir)BackendFeatureDeviceOpOverridesget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registration)CppWrapperCodeGenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)	ConstantFixedLayoutget_device_typeInputBuffer	Pointwise	Reduction
StorageBox	TensorBoxTorchBindObject)FALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsunsupported_output_tensor)BaseSchedulerNode)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_sympy_Expr_dtype#maybe_get_suppress_shape_guards_ctxshould_assume_input_aligned)NullHandlerV)_EffectType)WrapperCodeGen)output_code_log
perf_hints)log_module_codeargskwargsreturnc                       y N )rY   rZ   s     H/var/www/html/venv/lib/python3.12/site-packages/torch/_inductor/graph.pyrX   rX   {   s        dtypecudac                    t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  h}|r||j                  t         j                         |j                  t         j                          |j                  t         j"                         |j                  t         j$                         | |v S r]   )torchfloat32float64int64int32int16int8uint8boolbfloat16	complex32	complex64
complex128float16addfloat8_e4m3fnfloat8_e5m2float8_e4m3fnuzfloat8_e5m2fnuz)ra   rb   supported_dtypes      r_   supported_dtype_of_cpp_wrapperrx      s    



O E//0E--.E112E112O##r`   constant_bufferc                    t        | t        j                  t        j                  t        j                  j
                  j                  f      sJ d       t        | t        j                  j
                  j                        rt        j                  S t        | t        j                        rt        |       S | j                  rt        j                  S | j                  rt        j                  S y )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr   corenumbersIntegerrd   rg   rO   
is_integeris_floatre   )ry   s    r_   may_get_constant_buffer_dtyper      s    %,,

EJJ4F4F4N4NO qpq  /5::#5#5#=#=>{{/5::.#O44!!{{		!	!}}r`   opc                 L    t         D ch c]  }t        |       }}| |v S c c}w r]   r   )r   m	magic_opss      r_   is_magic_methodr      s*    0=>1#A&>I>? ?s   !objtargetc           	          |j                  d      }| }t        |      D ]=  \  }}t        ||      s t        ddj	                  |d |              t        ||      }? |S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)r   r   target_atomsattr_itriatoms         r_   getattr_recursiver      sv     <<$LH\* +4x&5chh|BQ?O6P5QR  8T*+ Or`   guser_visible_outputsc                    t         j                  syt        j                  t        j                  h}t        j
                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                  t        j                   h}dt"        j$                  j&                  dt(        t"        j*                  j,                     fd}t/        | j0                        D ]  } ||      }|s||v rd|j2                  d<   |j2                  j5                  dd      r0|j6                  D ]!  } ||      }|s||vsd|j2                  d<   # t         j8                  r~|s|j:                  |v sd|j2                  d<    y)a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    Nnoder[   c                     | j                   dk(  rZt        | j                  t        j                  j
                        r,t        | j                  d      r| j                  j                  S d S )Ncall_function_overloadpacket)r   r{   r   rd   _ops
OpOverloadr   r   )r   s    r_   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet   sU    
 ww/)4;;

(=(=>%67	 KK''	
 	
r`   Tdislike_paddingF)r-   comprehensive_paddingatenconvolutionconvolution_backwardvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducerd   fxr(   r   r   OpOverloadPacketreversednodesmetagetall_input_nodespad_outputsname)	r   r   ops_dislike_paddingops_like_paddingr   curr   priorprior_ops	            r_   mark_nodes_dislike_paddingr      sr    ''!! 									

hhmm

	%**--	.

   /!#&$$*.CHH&'88<<)51,, 9/6#3348EJJ019 ""$00*.CHH&'+/r`   c            !       v	    e Zd ZU eej
                     ed<   dej                  de	e
ee   ee   f   e
ee   ee   f   f   fdZdej                  de	eej                     eej                     f   fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 dbdej                   j"                  d	eeej                        d
ee   dee   dededeeedf      dee   deeeej0                     gef      dededeeeef      dee   ded    dee   ddf  fdZde
ej6                  j                  j
                  ef   dedefdZedededefd       Z dedefdZ!dej                   j"                  d	eej                     dedd fdZ"de#e$   fdZ%deddfd Z&dej8                  ddfd!Z'e(dejR                  jT                  jV                  fd"       Z,d#edee
ejZ                  ej\                  f      fd$Z/d#ede
ejZ                  ej\                  f   fd%Z0d#edejb                  fd&Z2d#ede
eef   fd'Z3d(edef fd)Z4d*ejj                  defd+Z6dd,d-ej\                  d.edefd/Z7d0ee   defd1Z8d2e
e9ej
                     ej
                  f   ddfd3Z:deddfd4Z;dedej                  fd5Z<dee   d6e
e   defd7Z=	 dcd6edee   de-fd8Z>ded9eej8                     defd:Z?d;ed(e	e@   d<eee@f   de
ee-df   f fd=ZAd;ed(ed<eeef   def fd>ZBed?ej                  defd@       ZCd;ed(e	dA   d<eee@f   de
eDe-ej                  eFf   fdBZGd;ed(ed<edeHfdCZId;ed(ed<edeHfdDZJd;ed(e	e@   d<eee@f   ddf fdEZKdddFZLeMdGej                   jH                  fdH       ZNdIe
ejZ                  ej                  f   dJe	e
eej                  f   dKf   de
ejZ                  ej                  f   fdLZQdMej                   jH                  dNe	e   dOeeef   dPe	e   dQeeef   ddfdRZRdSej                   jH                  de@f fdTZSdddUZTdddVZUde	eee	ee$f      f   fdWZVde	eee	ee$f      f   fdXZWdedYZXde	eee	eYef      ee	eYeZf      f   fdZZ[ed[eddfd\       Z\de]fd]Z^de]fd^Z_defd_Z`dee   fd`ZadedefdaZb xZcS )fGraphLoweringgraph_outputsexr[   c                 2   | j                   r2t        |j                               t        |j                               fS ddlm}  |dt        | j                  j                               }| j                  j                  ||      \  }}}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }}|D cg c]4  }t        |t        j                        r|j                  j                  n|6 }}||fS c c}w c c}w )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_)reuse_shape_envrL   sizestridetorch._dynamo.sourcer   len
_shape_env
var_to_val,create_symbolic_sizes_strides_storage_offsetr{   rd   SymIntr   expr)selfr   r   sourcer   r   _r   s           r_   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides  s     ,RWWY79R		:   < $,S1K1K-L,MNF LL	 LPPaz!U\\:APPMSTAu||!<!&&++!CTTV| QTs   9D9Dc                     |j                         D cg c]  }t        j                  |       }}|j                         D cg c]  }t        j                  |       }}||fS c c}w c c}w )z+
        Primarily used to weights
        )r   r|   r   r   )r   r   r   r   r   s        r_   static_sizes_stridesz"GraphLowering.static_sizes_strides*  sZ     +-'')4Qa 44,.IIK8q%--"88V| 58s   A%A*NFgmexample_inputs	shape_envgraph_idcpp_wrapperaot_moder   
layout_optextern_node_serializeris_inferenceis_const_graphconst_output_index
const_codeconst_moduler   c                    t         |   |       || _        ||n| j                  ||
      | _        d| _        |
| _        || _        || _        || _	        d| _
        |t               }d| _        n|| _        d| _        || _        |j                          |j                  j!                         | _        t%               | _        t)        |      | _        g | _        i | _        i | _        t%               | _        |r|j4                  n	t%               | _        |r|j6                  n	t%               | _        d| _        g | _        g | _        |r|ni | _        |rt%        |jA                               n	t%               | _!        |r|jD                  ni | _"        i | _#        i | _$        t%               | _%        t%               | _&        t%               | _'        t%               | _(        t%               | _)        t%               | _*        d | _+        d | _,        g | _-        ddl.m/} ta        jb                         r|	r|	n|| _2        d | _3        i | _4        t%               | _5        g | _6        i | _7        tq        tr              | _:        i | _;        ty        jx                         | _=        || _>        || _?        || _@        i | _A        || _B        || _C        t        t              | _F        d | _G        | j                  r| j                         n	t%               | _I        dh| _J        ||ni | _K        t        |j                  |       d| _N        d| _O        g | _P        d | _Q        i | _R        |j                         | _T        | j                  j                  j                  di       | _X        ||j                  ni | _Y        t                 t        j                  d       t              | _]        i | _^        t%               | __        t%               | _`        t%               | _a        y )	N)r   r   FT)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)bsuper__init__r   decide_layout_optr   num_channels_last_convr   r   r   r   extra_tracebackr%   r   r   freeze_runtime_assertsdeferred_runtime_assertscopyras_by_symbolr*   bound_unbacked_symbolsrK   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalzero_dim_cpu_tensor_listdevice_typesdevice_idxsrb   buffers
operationsr   keysfolded_constants	constantstorchbind_constantsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_buffersnever_reuse_buffersinplaced_to_remove
device_opswrapper_codeextern_kernel_nodes&torch._inductor.extern_node_serializerr   r-   	is_fbcoder   current_nodelistsmutated_inputsmutated_input_idxsname_to_bufferr   listname_to_users
name_to_optimecreation_timer   r   record_multi_kernel_choicemulti_kernel_to_choicer   r   next_post_grad_graph_counterpost_grad_graph_id	schedulerfind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   r   graph	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler   r   r   allocated_constant_namer4   	functools	lru_cacher1   effectful_opsaligned_inputsno_fuse_buffer_namesall_codegen_kernel_names)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   	__class__s                    r_   r   zGraphLowering.__init__4  s   ( 	, % '''F 	
 '(#(,$($ 
I#(D 'DO#'D # 	((* ..335 	 AK#(3,.24=?"9C%)5L%%:< 	 )5L$$*, 	 	(*.0"4" 	
 " )..01 	 '3L"" 	 FH .03=<0:8B$0:4>L 3=<-1,0>@ V !&< #, 	# ,0+-
/9|-/46@KD@Q35!YY[	&
 +6'68#  "&'?"@>B6:ooD002:< 	' "= =$8$D " 	! 	#288-AB !  	 9=& GI -/[[]040@0@0D0D.1
- 5A4LL00RT 	$ 	"#$=I$7$7$=>R$S!;=/9|5?\! :D%r`   r   featurec                 h    t        |t              sJ |       || j                  t        |            v S r]   )r{   r/   r1   r;   )r   r   r1  s      r_   has_featurezGraphLowering.has_feature  s4     '>2;G;2$33OF4KLLLr`   c                8   t         j                  syt         j                  ry| j                  j                  D cg c]@  }|j
                  t        j                  j                  j                  j                  k(  s?|B }}t        |      }|dk(  ryt        j                  j                  j                  r;t        j                  j                  j                         rt!        d |D              ryt        t#        | j                  j                              d|z  k\  rt$        j'                  d       yt)        d |D              rt$        j'                  d       yd	t*        d
t,        fd}d	t        j.                  j0                  d
t,        fd}d	t        j.                  j0                  d
t,        fd}|rBddlm} t7        t8              }	|D ]  }
t        j:                  j<                  j?                  |
      \  }}}|r~ |d      5 }t@        jB                  5   |
j
                  |i | ddd       ddd       jE                         } ||
      rd}n ||
      rd}n ||
      rd}nd}|	|xx   |z  cc<   t$        j'                  d        d}d}d}d}tG        |	jI                               }|	d   |z  |	d   |z  z   |	d   |z  z   |	d   |z  z   }||k  }|st$        j'                  d||       |S t)        tK        ||            rt$        j'                  d       yt)        tK        ||            rt$        j'                  d       yt!        tK        ||            rt$        j'                  d       yyc c}w # 1 sw Y   UxY w# 1 sw Y   ZxY w)zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTr   c              3      K   | ]G  }d D ]@  }|j                   |   j                  d   j                  t        j                  d      k(   B I yw)r   r,   valcpuN)rY   r   r   rd   .0nidxs      r_   	<genexpr>z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sT      !  s  '..%,,u2EEEs   AAi,  z*Skipped layout opt because only a few convc              3   t   K   | ]0  }d D ])  }t        |j                  |   j                  d          + 2 yw)r6  r7  N)r"   rY   r   r9  s      r_   r=  z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sE      

  QVVC[--e45
5
s   68zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670r;  r[   c                     | j                   d   j                  d   }t        |t        j                        sJ | j                   d   dkD  xr |j                  d      dkD  S )Nr,   r7  )rY   r   r{   rd   r   r   )r;  meta_vals     r_   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_grouped  sQ    vvay~~e,Hh55566":>:hmmA&6&::r`   c                    | j                   d   j                  d   j                  d      dz  | j                   d   j                  d   j                  d      k  xr. | j                   d   j                  d   j                  d      dkD  S )Nr,   r7  r      rY   r   r   r;  s    r_   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel	  sv    q	u%**1-1QVVAY^^E5J5O5OPQ5RR 6FF1INN5)..q1A5r`   c                     | j                   d   j                  d   j                  d      dk  xr. | j                   d   j                  d   j                  d      dk  S )Nr,   r7  r   @   rE  rF  s    r_   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channel  sT    q	u%**1-3 8FF1INN5)..q1R7r`   )FlopCounterMode)displayNgroupedsmallin_outdefaultzConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)&r-   layout_optimizationforce_layout_optimizationr   r   r   rd   opsr   r   rP  r   backendsmkldnnenabledis_availableallr  logdebugr   r   rl   r   r(   torch.utils.flop_counterrK  r   float	_inductorfx_utilsget_fake_args_kwargsrS   	fake_modeget_total_flopsr   valuesmap)r   r   r;  
conv_nodesnconvrB  rG  rJ  rK  flop_countsr   successrY   rZ   flop_counter_modecounted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                           r_   r   zGraphLowering.decide_layout_opt  so    ))++ xx~~
UYY^^5O5O5W5W)WA

 
 JA: NN!!))%%224 #  
 tBHHNN#$e3IIBC 

 

 IIw 	;# 	;$ 	;
	 	4 		 	$ 	 @,7,>K" <(-(@(@(U(U)%v (7 9;L[[ 9'DKK8899 %6$E$E$GM!$'$-	)$/$+	*40$,	$-		*m;*II:;-<6 "'!& %$k0023K I&);;g&)99:h'*;;< i(+==>  +k9M 		~"
 ! & s:z*+IIX  s$j12IIT  s#Z01IIVWm
D9 99 9s0   A M=<M=NN/NNNN	c                 @    | j                   | j                    d| S |S )z2Prepend the given name with the graph name if any.r   )r   r   r   s     r_   qualify_namezGraphLowering.qualify_namew  s&    99 ii[$((r`   subgraph_namec                     t        ||| j                  | j                  | j                  | j                  | j
                  | j                  |            S )a  
        Make a subgraph of the current graph with all inherited
        parts, except the graph module (`gm`) and `example_inputs`.
        The subgraphs are lowered separately, but intended to be
        inlined in the parent graph's codegening. Hence the need
        for maintaining the same `shape_env` and other properties.
        The subgraph name is qualified by the parent graph's name.
        )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   rt  )r   r   r   ru  s       r_   make_subgraphzGraphLowering.make_subgraph}  sN     )oo((]]#'#>#>**""=1	
 		
r`   c                    t               }t        | j                  j                  j                        D ]w  }|j
                  t        j                  j                  j                  j                  k(  r|j                  |       P|j                  D ]  }||v s|j                  |        w y | j                  j                  j                  D ]"  }||v s|j                  |j                         $ |S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        )r*   r   r(  r   r   r   rd   rS  r   r   rP  rr   usersupdate)r   
output_setr;  users       r_   r  z-GraphLowering.find_nodes_prefer_channels_last  s    & (2|
$++++112 	Axx599>>55===q! :%NN1%	0 ""(( 	+AJ!!!''*	+ r`   c                     || j                   vr2| j                   j                  |       t        j                  d|       y y )NzUsing FallbackKernel: %s)r  rr   perf_hint_loginfors  s     r_   warn_fallbackzGraphLowering.warn_fallback  s:    t,,,!!%%d+94@ -r`   c                 R   | j                   j                  |j                         |j                  %| j                  j                  |j                         t
        j                  j                  r7|| j                  vr(t
        j                  j                  | j                  |<   y y y r]   )	r   rr   typeindexr   rS   r   r  r%  )r   r   s     r_   add_device_infozGraphLowering.add_device_info  sy    fkk*<<#  .77F$2J2J$J/0ww/C/CD$$V, %Kr`   c                 "    t         j                  S r]   )rS   r`  r   s    r_   r`  zGraphLowering.fake_mode  s    {{r`   buffer_namec           	         || j                   v r| j                   |   S || j                  v r| j                  |   S || j                  v ryt        j                  j                  |   }t        j                  |t        j                  |j                  |j                  gt        j                  j                  |             S y r]   )r  r   r   rS   r   r.   ConstantBufferr:   r   ra   r   )r   r  datas      r_   try_get_bufferzGraphLowering.try_get_buffer  s     $---&&{33$+++$$[11$..(77$$[1D$$KK./gg.J.J4.P  r`   c                 H    | j                  |      }||S t        d|       )Nz$Failed to find buffer matching name )r  r   )r   r  bufs      r_   
get_bufferzGraphLowering.get_buffer  s/    !!+.?JA+OPPr`   c                    || j                   v r| j                   |   j                  S || j                  v r| j                  |   j                         S || j                  v r| j                  |   j                         S t        j                  d|      }|r | j                  |j                  d            S t        d|       )Nz1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r,   could not find )	r   ra   r  	get_dtyper   rematchgroupKeyError)r   r  r   s      r_   r  zGraphLowering.get_dtype  s    $..(>>+.444$---&&{3==??$+++$$[1;;==HHI;W>>!''!*--677r`   c                 `   ddl m} || j                  v r| j                  |   j                         S || j                  v r7| j                  |   }t        t        |dd       |      ry|j                         S || j                  v r| j                  |   j                         S t        d|       )Nr,   )MultiOutputLayoutlayoutr  )
r.   r  r   numelr  r{   r   	get_numelr   r  )r   r  r  r  s       r_   r  zGraphLowering.get_numel  s    )$..(>>+.4466$---%%k2C'#x68IJ==?"$+++$$[1;;==677r`   rY   c                 ^    t        d      5  t        |   | cd d d        S # 1 sw Y   y xY w)NzGraphLowering.run)r   r   run)r   rY   r0  s     r_   r  zGraphLowering.run
  s-    -. 	&7;%	& 	& 	&s   #,r   c                    |j                   
J d|        t        |t        j                        sJ | j	                  dt        | j                               }| j                  j                  |       || j                  |<   ||_         |S )NzOperation registered twice: r   )	operation_namer{   r.   	Operationrt  r   r   appendr  )r   r   r   s      r_   register_operationz GraphLowering.register_operation  s      (M,H*MM("bll+++  2c$//&:%;!<=r" " r`   set_namebufferr  c                l   | j                  dt        | j                               }| j                  j                  |       || j                  |<   t        |t        j                        r|j                         s/|j                         | j                  |j                                |r||_        |S )Nr  )rt  r   r   r  r  r{   r.   ComputedBufferis_zero_elements
get_devicer  r   )r   r  r  r   s       r_   register_bufferzGraphLowering.register_buffer  s      3s4<<'8&9!:;F#$*D! FB$5$566;R;R;T!!#/  !2!2!45FKr`   operation_namesc                 j    | j                  ddj                  |      z         }|| j                  |<   |S )Nlist_r   )rt  r   r  )r   r  r   s      r_   register_operation_listz%GraphLowering.register_operation_list&  s3      388O+D!DE*

4r`   node_outputc                      dt         t        t        j                     t        j                  f   dd f fd |       y )Nvaluer[   c                     t        | t        t        f      r| D ]
  } |        t        | t        j                        r4| j                         D ]   }j                  |   j                  |        " y y r]   )r{   r  tupler.   r@   get_read_namesr  r  )r  x	read_nameregisterr   s      r_   r  z1GraphLowering.register_users_of.<locals>.register.  sm    %$/  AQK %.!&!5!5!7 @I&&y188?@ /r`   )r   r
   r.   IRNode)r   r  r  s   ` @r_   register_users_ofzGraphLowering.register_users_of+  s:    	@E(299"5ryy"@A 	@d 	@ 	r`   c                     t        |t              sJ | j                  j                  |       || j                  vry| j                  |   D ]  }|j                           y)z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)r{   strr  rr   r  realize)r   r   r|  s      r_   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutated8  s\    
 $$$$  &t)))&&t, 	DLLN	r`   c                     || j                   v r|| j                  v s
J d|z          t        | j                   |         }|| j                  j                  v r| j                  j                  |   S | j                  |   S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )r)  r   rN   r(  r   )r   r   	orig_names      r_   get_original_value_of_constantz,GraphLowering.get_original_value_of_constantF  s     t3338N 	
2T9	
N 5T5Q5QRV5WX	 DKK,,, KKY'	
 %	
r`   r  c                 x   |}t         j                  j                  s
| j                  j	                         D ]  \  }}|j
                  r|j                         |j                         k(  s5|j                         |j                         k(  sW|j                  |j                  k(  sq|j                  |j                  k(  s|j                         j                         |j                         j                         k(  s|j                         |j                         k(  s|c S  |dt        | j                         }|J |d   j                         rd| }| j                  |      }t!        j"                  dd|      }|}d}|| j                  v r| d| }|dz  }|| j                  v r|| j                  |<   |j                  d|j                  dt%        |j                               dt%        |j                               dt'        |      d	| j(                  |<   || j*                  |<   |S )	Nconstantr   	constant_z[^a-zA-Z0-9_]r   r,    r  )r-   aot_inductoruse_runtime_constant_foldingr   items	is_mkldnnr   r   ra   r   untyped_storagedata_ptrstorage_offsetr   isdigitrt  r  subr  hashr  r)  )r   r   r  r  constant_namer  prefixcnts           r_   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_nameV  s    	""??(,(<(<(> )$u		uzz|37

ekk1u||3,,.779,,.779:++-1E1E1GG(() <c$..123D7??tf%D  & (#t4dnn$XQse$D1HC dnn$  $t{{oQtzznATYY[!$AeDKKM&:%=QDz!n 	D!
 .7$$T*r`   c                     | j                  ||      }t        j                  t        j                  |t        |j                  |j                  g| j                  |                   S r]   )	r  r@   creater.   r  r:   r   ra   r   )r   r  r   new_names       r_   add_tensor_constantz!GraphLowering.add_tensor_constant  s]     33D$?DKKVd6O6OPT6UV
 	
r`   device_overridec                 X   | j                   |   j                  |k(  s||S t        j                  j                  j                         5  | j                  | d|j                   |j                  xs d | j                   |   j                  |            cddd       S # 1 sw Y   yxY w)z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        Nr   r   )
r   r   rd   utils_python_dispatch_disable_current_modesr  r  r  to)r   r   r  s      r_   r  zGraphLowering.constant_name  s     >>$&&/9_=TK[[))@@B 	 33&/../0E0E0J/KLt$''8	 	 	s   	AB  B)r   rZ   c                    t         	|   |||      }| j                  j                  |       t	        |t
              r'|j                  j                  }|| j                  |<   |S t	        |t        t        t        f      r&t        j                  |      }|| j                  |<   |S |y t	        |t              ry t	        |t        j                         sJ |       |j"                  s| j%                  |      \  }}n| j'                  |      \  }}| j)                  |      }t+        j,                  t/        |t1        |j2                  |j4                  ||                  }|| j                  |<   |j6                  j6                  | j8                  |<   | j:                  j<                  r| j?                  |j2                         tA               5  tC        |      r| jD                  jG                  |       d d d        |S # 1 sw Y   |S xY wr]   )$r   placeholderr   r  r{   r&   r   r   r   intrl   r\  r|   sympifyr   rd   r   _has_symbolic_sizes_stridesr   r   rt  r@   r  r<   r:   r   ra   r  r   r  ry  r  rP   rQ   r-  rr   )
r   r   rY   rZ   exampler   sizesstridestensorr0  s
            r_   r  zGraphLowering.placeholder  s    '%fdF;%%f-gx(<<$$D(,Df%K#tU!34==)D(,Df%K_g}- '5<<09'90
 22!66w?NE7!88ANE7""6*!!GNNGMM5'J
 %+&!-3[[-=-=""6*""  0 12 	0*73##''/	0 	0 s   'G>>Hc                    |t         j                  u r/t        |d   t        t        t
        f      rt        |   |||      S t        |t        j                  j                        st        |d      r ||i |S |t        vrt        |t        j                  j                        s
J | d       |j                         j                  d      d   }|t         v rt#        |       nt$        j&                  rKt)        |g      rt*        nt,        }t.        j1                  d|j3                  |||             t#        |       n&t)        |g      rt+        |||      t-        |||      	 t.        j5                  dt        |          t        |   |i |}|S # t6        $ r-}t9        ||||      j;                  |j<                        d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   z"Creating implicit fallback for:
%sz  via %s)operatorgetitemr{   r  r  dictr   r   rd   r   r   r   rE   r   r   r   rB   rF   r-   implicit_fallbacksr   r7   r8   rY  r  operator_strrZ  	Exceptionr6   with_traceback__traceback__)	r   r   rY   rZ   	base_nameerrorouter0  s	           r_   r   zGraphLowering.call_function  s   X%%%*T!WtUD>Q*R7(v>> &%**"="=>71D
 4*6**"

-- 0./0  ++C03I//f%** *6(3 .5 
 9&&vtV< f%#VH- 0fEE264HH	IIj)F"34F#T4V4CJ 	#AvtV<KK	s   ),F 	G(GGtc                 Z    t        | j                        dk(  xr | j                  d   dk  S )zM
        True if this is a small constant attr that will be inlined.
        r,   r      )r   shape)r  s    r_   can_inline_constantz!GraphLowering.can_inline_constant  s(    
 177|q 4QWWQZ1_4r`   r^   c                    t        | j                  |      }t        |t        j                  j
                        rt        j                  ||      S t        |t        j                  j                        r*|| j                  |<   d| j                  |<   t        ||      S t        |t        j                        sJ t        j                  j                   st        j"                  st%        |      r| j'                  ||      S t)               5  |j*                  dk(  r8t-        |j/                         |j0                  |j2                        cd d d        S | j5                  |      r[t6        j9                  dt;        |             ddlm}  ||jA                         |j0                  |j2                        cd d d        S 	 d d d        | j'                  ||      S # 1 sw Y   xY w)N)r   graph_moduler   r^   zInlining constant: %s r,   )r  )ra   r   )!r   r(  r{   rd   r   r   r.   Subgraph_CScriptObjectr   r  rA   r   r-   r  r  always_keep_tensor_constantsrI   r  r)   r  r9   itemra   r   r  rY  rZ  r  loweringr  tolist)r   r   rY   rZ   r  r  s         r_   get_attrzGraphLowering.get_attr  sw    "$++v6eUXX112;;F??eUXX223/4D$$V,*,D'"6511%...<<22(/++E6::] 	V{{b 

ekk5<<H	V 	V ''.		2CK@,ellnEKKU	V 	V /	V ''v66	V 	Vs   >GA"GG%c                     t         r]   AssertionErrorr   r   rY   rZ   s       r_   call_modulezGraphLowering.call_module-      r`   c                     t         r]   r  r  s       r_   call_methodzGraphLowering.call_method0  r  r`   c                 d   t         |   |||      }t        |t        t        f      s|f}t        |t        t        f      sJ t        |             t        d |D              sJ |       t        j                  j                  j                  d   }t        |t        t        f      s|f}|D cg c]!  }t        j                  j                  |      # }}g }t        |      t        |      k(  sJ t        ||      D ]}  \  }}	t        |t        j                   t        j"                  f      s|j%                  |       B|j%                  | j'                  ||	j(                  d   j+                                       || _        | j.                  j1                         D ]+  \  }
}t        |t         t2        j4                  f      sJ dt        |              t        |t               sK|j7                          t        |t               sJ |j8                  }t        |t        j:                        sJ |}|j8                  }t        |t<              r|j?                         |
k7  st        j@                  jC                  || jD                  |
          	 | j,                  jG                  |      }| jD                  |
   | j,                  |<   . | jK                          tL        jO                  d| jP                  | jR                  | jR                         y d       y c c}w # tH        $ r Y w xY w)Nc              3     K   | ]  }t        |t        t        j                  t	        d       t        j
                  t        j                  t        j                  j                  j                  t        t        j                  f        y wr]   )r{   r@   r.   r9   r  r  r|   r   logicboolalgBooleanr  EffectfulKernel)r:  r  s     r_   r=  z'GraphLowering.output.<locals>.<genexpr>;  sg      
  KKJ%%JJKK''//&&	
s   B	Br   r7  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr@  )*r   outputr{   r  r  r  rX  rS   r   r  rY   r.   ExternKernelrealize_inputr   zipr@   BaseViewr  try_match_insignificant_stridesr   r   r   r   r  r|   r   r  r  r?   r<   get_nameMutationLayoutSHOULDREMOVErealize_intor   r  
ValueErrorfinalizerY  rZ  r   r   )r   r   rY   rZ   resultfx_node_argsr  result_correct_stridesrfx_noder   r  value_storage_boxindr0  s                 r_   r  zGraphLowering.output3  s    f5&5$-0YF&5$-0>$v,>0 
 
 
 	 	 
" ww++003,6(?L<BCq"////2CC!#< CK///fl3 
	JAwa",,!<=&--a0 '--887<<.557
	 4,,224 	KD%	5::. G8eFG  eY/MMOeY///JJEeR]]333 %JJEe[1U^^5E5M--::455d;,,223DEC.2.H.H.ND&&s+)	0 			U''!]]6DMM	
 =?	
W DN " s   /&L7L""	L/.L/c                 F    | j                   D ]  }|j                           y r]   )r   decide_layout)r   r  s     r_   r  zGraphLowering.finalize  s!    << 	 C	 r`   r   c              #   b   K   | j                   }	 || _         d  || _         y # || _         w xY wwr]   )r  )r   r   olds      r_   set_current_nodezGraphLowering.set_current_node  s1     	$ $D #DDs   /# /	,/r  meta_strides_inp.c           	      t    t         j                  j                  j                  |      sJ |D cg c]4  }t	        |t         j
                        r|j                  j                  n|6 }}t         fdt        ||j                               D              r|S dt        t        t        t        f      dt        t        t        t        f      dt        t        t        t        f      dt        f fd} ||j!                         ||j                               s|S t         j                  j                  j#                  |      \  }}t%        |j&                        }t)        |j!                               D ]*  \  }	} j*                  j-                  |d      s#||	   ||	<   , t         j                  j                  j/                  |j0                  |j2                  |j4                  ||j6                        }
t        j8                  t         j                  j                  j;                  ||
            S c c}w )a  
        Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
        dimensions - size 0 or 1 - will be updated.

        If there are real stride differences (NHWC vs NCHW) then the input will be returned.
        c              3   \   K   | ]#  \  }}j                   j                  ||       % y wr]   )r   statically_known_equals)r:  s1s2r   s      r_   r=  z@GraphLowering.try_match_insignificant_strides.<locals>.<genexpr>  s.      
B MM11"b9
s   ),r  meta_stridestensor_stridesr[   c                     t        | ||      D ]A  \  }}}j                  j                  |d      r$j                  j                  ||      rA y y)Nr,   FT)r  r   statically_known_leqr*  )r  r-  r.  dimr+  r,  r   s         r_   significant_strides_equalzPGraphLowering.try_match_insignificant_strides.<locals>.significant_strides_equal  sV    
  #5,G !R==55c1=}}<<RD ! r`   r,   )rd   r]  r.   is_storage_and_layoutr{   r   r   r   rX  r  
get_strider   r   r   r  rl   get_sizeas_storage_and_layoutr  r   r   r   r0  r:   r   ra   r   offsetr@   ReinterpretView)r   r  r'  sr-  r2  storage
old_layout
new_strider   
new_layouts   `          r_   r  z-GraphLowering.try_match_insignificant_strides  s    !!77??? HX
BC:a6AFFKKA=
 
  
lF,=,=,?@
 
 M	E$),-	"5s#34	 %U49%56	 		 )OO|V->->-@
 M#oo00FFvN*++,
foo/0 	0DAq}}11!Q7 ,Q
1	0 __''33OO

 ||EOO..>>w
STTU
s   9H5r  old_args
old_kwargsnew_args
new_kwargsc                     t        |j                  t        j                  j                        sJ t        |      t        |      k(  sJ t        |      t        |      k(  sJ dt        j                  j                  dt        j                  dt        j                  ddf fd}|j                  j                  }t        t        ||            D ]!  \  }\  }	}
|j                  |   } |||	|
       # |j                  D ci c]  }|j                  | }}|j                         D ]  }||   }	||   }
||   } |||	|
        yc c}w )ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        
schema_argold_argnew_argr[   Nc                     ||u ry | j                   Y| j                   j                  rBj                  t        j                  j
                  j                  j                  ||fi        y y y r]   )
alias_infois_writer   rd   rS  r   copy_rP  )rC  rD  rE  r   s      r_   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagate  sa     '!$$0Z5J5J5S5S ""599>>#7#7#?#?'7ASUWX 6T0r`   )r{   r   rd   r   r   r   r  Argumentr.   r  _schemar   r  	argumentsr   r   )r   r  r>  r?  r@  rA  rJ  schemar<  rD  rE  rC  argschema_kwargskeys   `              r_   propagate_mutationz GraphLowering.propagate_mutation  sD     '..%***?*?@@@8}H---:#j/111	Y))	Y46II	YHJ			Y	Y '''0Xx1H'I 	:#C#'7))#.JJ9	: 392B2BC33CC??$ 	:C oG oG&s+JJ9		: Ds   Er;  c                 $#   )*+ dt         dd ffd}t         j                        *t         j                        +h}j                  dk(  }|r# j                        \  }}|t        ||      z  }t        j                  j                  |      5   j                        5  t        j                        5  j                  dk(  rNj                  t        j                  ur2t              r' |d        t!        j                  d      i }n"j                  dk(  ret#        j                        x}rN |d       }	}
 |g|i |\  }} j%                  j                  ||      } j'                  |	|
||       nt)        j                        r |d	       t+        j,                  d
   t.        j0                  t.        j2                  t.        j4                  f      r$j,                  d
   j6                  j8                  }n't:        , y        }n |d       t:        , y        }t.        j>                  j@                  jB                  jD                  t.        j>                  j@                  jF                  jD                  t.        j>                  j@                  jH                  jD                  t.        j>                  j@                  jJ                  jD                  t.        j>                  j@                  jL                  jD                  g)tO        d jP                  D              }tO        )fdjP                  D              }j,                  jS                  dd      rt+        |tT              r|jW                          j,                  d
   jY                         }t/        jZ                  j\                  j^                  | }ta        |d      r|jc                         |k7  r7|s5t        jd                  |      }t        jf                  ji                  ||      }|rDt+        |tT              r4t+        |jj                  t        jl                        r|jW                          |s|r,t+        j,                  d
   t.        jn                        rj,                  d
   jY                         }t        |      rtp        jr                  xs jt                   jv                  vxr | }t.        jx                  j{                  j,                  d
         }t        t}        |            dkD  }|s|rt        |j                               dk(  rd j                  v rVjt                   jv                  vr>|s<t        j                  j                  |j                         t.        j                        }|st        |      r؉j,                  d
   j                         s$t+        |jj                  t        jl                        r6t        jf                  ji                  |t        jd                  |      |      }na|D cg c]4  }t+        |t.        j0                        r|j6                  j8                  n|6 }}t        jf                  j                  |||      }t        t        jP                              }|dkD  rt+        |tT              rjP                  D ]  }|j                  t        v rn|j                          t.        j>                  j@                  j                  jD                  t.        j>                  j@                  j                  jD                  t.        j>                  j@                  j                  jD                  g}g } j                  s=|j                  t.        j>                  j@                  j                  jD                         t.        j                  j                  r|t.        j>                  j                  j                  jD                  t.        j>                  j                  j                  j                  t.        j>                  j@                  j                  jD                  t.        j>                  j                  j                  jD                  t.        j>                  j                  j                  j                  t.        j>                  j                  j                  j                  t.        j>                  j                  j                  j                  gz  }|t.        j>                  j                  j                  jD                  t.        j>                  j                  j                  j                  t.        j>                  j                  j                  j                  t.        j>                  j                  j                  jD                  t.        j>                  j                  j                  jD                  t.        j>                  j                  j                  j                  gz  }t.        j                  j                  r2|t.        j>                  j                  j                  jD                  gz  }|j                  |v rPt        jf                  ji                  |t        jd                  j,                  d
   jY                               d      }|j                  |v rd|j                  d   u rSt        jf                  ji                  |t        jd                  t        j,                  d
   j                                    }|j                  dk(  st+        |jj                  jj                  t        t        f      s|jW                           |j                  t        jP                               t+        |tT              r |j                         r|j                          t+        |tT              r`t+        |jj                  t              rF|jj                  jj                  }t+        |t              r |j                         r|jW                          d d d        d d d        d d d        t+        tT              rt+        |jj                  t        j                        rt+        |jj                  jj                  t        j                        r|jj                  jj                  _j        nt+        |jj                  jj                  t        j                        rX|jj                  jj                  _j        t+        |jj                  jj                  t        j                        r^t+        |jj                  jj                  jj                  t        j                        r&|jj                  jj                  jj                  _j        nt+        |jj                  jj                  t        j                        r|jj                  jj                  j                  sct+        |jj                  jj                  j                  d   t        j                        r(|jj                  jj                  j                  d   _j         j                  |       t               } j                  *d  D ]  }||j                         z  }  j                  +d  D ]  }||j                         z  } dt         f*+ fd}j                  dk7  r=t        j                  j                  j                  }dt        dt         dd f fd}|D ]T  } j                  j                  |g       } |j                  |   }!|j                         j                  |!      s~dt        dt        fd}" |"|!j                        r% |||!j                  k\  | d|!j                           |"|!j                        r% |||!j                  k  | d|!j                          | D ]  }#t}        |#j8                        }$|$ j                  z
  }%|%r>t        |%t               }& j                  j                  |&g       j                  |#       g ||#j8                  |#j8                           W  xj                  |z  c_~        t        t        j                  j                  j                  j,                  jS                  di             }'t        d  |'j                         D              }(||(k\  s'J d!| d|( d"j                          d# |               |S c c}w # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w)$Nmsgr[   c                 Z    t         j                  dt        j                        |        y )Nzlowering %s %s)rY  rZ  r   format_node)rT  r;  s    r_   rZ  z%GraphLowering.run_node.<locals>.debug  s    II&
1==(A3Gr`   r   rC   F)add_to_fallback_setlayout_constraintsr   r7  r   c              3   :   K   | ]  }|j                   d k(    yw)r  N)r   )r:  r|  s     r_   r=  z)GraphLowering.run_node.<locals>.<genexpr>7  s     DDDGGx/Ds   c              3   :   K   | ]  }|j                   v   y wr]   )r   )r:  r|  as_strided_opss     r_   r=  z)GraphLowering.run_node.<locals>.<genexpr>8  s      *26~-*s   inductor_realize_to_stridesr4  r      )allow_paddingr,   Tr  c                     g } j                   d  D ](  }| j                  d|j                          d| d       * j                  d  D ](  }| j                  d|j                          d| d       * dj	                  |       S )Nzunbacked_symbol_defs=z in:

z***
)r   r  get_unbacked_symbol_defsr   r   )r  r  r   buffer_watermarkoperation_watermarkr   s      r_   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defs  s    A||$4$56 +C,H,H,J+K6RUQVVXY oo&9&:; +B,G,G,I+J&QSPTTVW <<?"r`   r  r   c                 z    t        j                  | |      }j                  |d       j                  |       y )NTr  )r.   AssertScalarr  r  )r   rT  	assert_opr   s      r_   make_assertz+GraphLowering.run_node.<locals>.make_assert  s4    OOD#6	$$Y$>''	2r`   r9  c                 Z    | t         t          fv ry	 t        |        y# t        $ r Y yw xY w)NFT)r+   r  	TypeError)r9  s    r_   is_convertiblez.GraphLowering.run_node.<locals>.is_convertible"  s5    & 11#()F#'( )#()s    	**z >= z <= )rQ  unbacked_bindingsc              3      K   | ]8  }t         j                  j                  j                  j	                  ||       : y wr]   )rS   r`  r   unbacked_renamingsr   )r:  r9  s     r_   r=  z)GraphLowering.run_node.<locals>.<genexpr>L  s5      3 %%88<<QB3s   >A zfailed z (inductor >= fx)
fx node is: z
new operations are:

)r  r   r   r   r   fetch_args_kwargs_from_envrM   r.   r  current_originsr&  rS   r   r  r  rD   rC   rG   r   rR  r   r{   r   rd   r   SymFloatSymBoolr   r   r   run_noderS  r   
as_stridedrP  as_strided_as_strided_scatterresize	resize_asr   ry  r   r@   r  r   r]  r  any_is_symbolicr   r4  get_stride_orderr  require_stride_orderr  r  r   r-   r   r   r   _prims_commonis_non_overlapping_and_denser!   r5  r  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesr*   rH   realize_hintr   mm_int_mmr   r  r   r  _has_mkldnnrU  _linear_pointwisebinarymkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensor_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrY   r   r  r=   r>   
mark_reusehas_exceeded_max_readsr?   has_large_inner_fnLoopsorigin_nodeBufferr  MultiOutputindicesinputsr  ra  r   r   r   r   r   popvar_to_range _default_unspecified_value_rangeissubsetrl   lowerupperr   r   
setdefaultr#   r   rV  )-r   r;  rZ  originsis_call_functionrY   rZ   r  rX  r>  r?  	is_outputis_input_for_as_stridedr  sym_stridesstride_orderr^  denseunbacked_symbols_in_stridesr9  	num_usersr|  need_fixed_layoutneed_fixed_channels_last_layoutcurrnew_unbacked_defsr  r   rd  r   rh  i0rasvrrk  rafvsmissingi1rl  renamed_unbacked_bindingsr[  rb  rc  r0  s-   ``                                       @@@r_   rs  zGraphLowering.run_node  sO   	Hs 	Ht 	H t||,!$//2#44?2::1=LD&~dF33GYY&&w/ Q	)1F1F2
 Q	)
Q	) 'HHH$4$449!<()N)!((N# (&>qxx&HH"H*+#
1!EdEfEf++AHHdFC ''8ZvN * '(FF5MELL%..%--#P VVE]//44F"W-a0Fb	)!, 		))11		**22		1199		%%--		((00N DAGGDDI&) *:;''* '# vvzz7?J	E  &&-..0#oo33CCWM5((*g5'#%#6#6w#?L__AA&,WFvy1v{{BKK8  4*uu||; &&-..0w<**UaffD<U<U.U%655 " "//LLuE 1':;a? 0 8! 12a7!@!@@FF$*C*CC 7"$"3"3"T"T"OO-u/B/B# 73w< 66%=113z"KK8 &(__%I%I & " 3 3G <.; &J &F *1'$% 0:!U\\/JPQ Q'G ' &(__%J%J &} &K &F Jqww/0I1}FI!>GG ;-D{{&;;++- "IINN??GG!IINN--55!IINN22::-)
 ;=7#-44UYY^^5O5O5W5WX 88//- %		 0 0 B B J J %		 0 0 B B I I %		 ? ? G G %		 0 0 B B J J %		 0 0 B B I I %		 0 0 B B I I %		 0 0 B B P P2 - < %		 0 0 G G O O %		 0 0 G G N N %		 0 0 H H O O %		 0 0 Q Q Y Y %		 0 0 B B J J %		 0 0 B B I I@ ;  %xx// 1eiimm6O6O6W6W5X X 1;;*;;%'__%I%I & " 3 3AFF5M4H4H4J K.2 &J &F !KK+JJ !TYYq\ 1%'__%I%I & " 3 3$B166%=CVCV$W!"&F ww(*%fkk&6&6I8NO"NN,w;-| !!#agg,/ &),1N1N1P ##% &),FKK1T{{''dI...0(cQ	) Q	) Q	)v fi(ZR]]-S&++**BHH5/0  ,FKK,,bii8/0  ,fkk..0A0ABzKK$$))288H 9:FKK$$))5 v{{//@"KK,,44!&++"2"2"9"9!"<biiHAB((//2>v&6@l<< 0 12 	@C!=!=!??	@//"5"67 	?B!<!<!>>	?
	# 
	# 44= * ((22I3$ 3S 3T 3
 ( ;((,,R4++B/ AACLLRP)$ )4 ) &bhh/#B"((Nrd$rxxj4IJ%bhh/#B"((Nrd$rxxj4IJ ;B/8C!D$?$??G c2**55b"=DDRH#BGGy:;);: ''+<<' 9  **AFFJJ7JB,O!" )3 3*//13 )% %(AA +,D1J0K L }}/ 0**9*;)<>A 'oQ	) Q	) Q	) Q	) Q	) Q	)sr   AF'AE8=VAE+	9AE&
S9AE+=*AE+)CAE+ AE8AFE&AE+E+AE5E0AE8E8AF	E=AFFAFc                    t         j                  rt        d      t        j                  dvrt        dt        j                         | j
                  j                         D ]  }d }t        |t              r|j                         }nXt        |t        j                  t        j                  t        j                  j                  j                  f      rt!        |      }t#        || j$                        rt        d|        y )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform zUnsupported input dtype )r-   disable_cpp_codegenr5   sysplatformr   rb  r{   r@   r  r|   r}   r   r~   r   r   r   rx   rb   )r   r  ra   s      r_   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapperX  s    %%()BCC<<;;(+@)OPP&&--/ 
	QEE%+)ejj%**2D2D2L2LM 6e<1%C,/Gw-OPP
	Qr`   c                    d| j                   v | _        | j                  r| j                          | j                   j	                         }|j                  d       |j                  d       t        |      dk  s%J dj                  dj                  |                   t        |      dk(  }|rdn|j                         }t        |      | _        t        || j                        }|J d| d	        |       | _        | j                  r_| j                  j                  j                  | j                  _        | j                  j                  j                   | j                  _        y y )
Nrb   r8  r   r,   zDoes not support mixing {}+r   zDevice z not supported)r   rb   r   r  r   discardr   formatr   r  r2   r  r3   r	  r   _names_itersrc_to_kernel)r   r   only_cpudevice_typewrapper_code_gen_clss        r_   init_wrapper_codezGraphLowering.init_wrapper_codek  sL   d///	224((--/U#V$< A% 	
'C'J'JHH\"(
 	
% |$)'e\-=-=-?1+>=)) 
 $/V7;-~1VV/02 -1,=,=,J,J,V,VD)!!..<< +	 r`   c                    d| j                   v rd| _        t        j                  ddi      5  | j	                         j
                  }ddd       t        j                  j                  s<dt        t        j                  t        j                  t        j                  f   dt        t        t        t        j                  f   fd}t        j                  j                   j#                         }|t%        t&        j(                  t*              sy|j,                  r|j,                  j/                          |j0                  D cg c]  }|| }}t3        j4                  |t&        j(                        D cg c]
  } ||       }}nMt%        t&        j(                  t*              r| j6                  nt&        j(                  D cg c]
  } ||       }}| j8                  rd	d
lm} t?        | j@                        D 	
cg c]2  \  }	}
|
| j8                  v rt%        ||	   t        j                        r|	4 }}	}
|D ]/  }	||	   }t%        |t        j                        sJ  ||      ||	<   ~1 t        jB                  jD                  jG                         5   |       ddd       ~d| _        | jH                  j/                          | jJ                  j/                          | jL                  j/                          t&        jN                  jP                  jR                  j/                          t&        jN                  jP                  jT                  j/                          t        j                  ddi      5  | jW                         cddd       S | jW                         S # 1 sw Y   [xY wc c}w c c}w c c}w c c}
}	w # 1 sw Y   'xY w# 1 sw Y   yxY w)ad  
        For CPU, the cpp wrapper codegen is done in one pass.
        For GPU, the cpp wrapper codegen is done in two steps: JIT-compile the model with python
        wrapper code and run it to generate autotuned kernel binaries in the first pass; and then
        generate cpp wrapper code and compile it to a dynamic library in the second pass.
        rb   Fztriton.store_cubinTNr  r[   c                 .   | y t        | t        j                  t        j                  f      r| j                  j
                  S t        | t              rt        |       S t        | t        j                        sJ dt        t        |             z          | S )Nz&Unknown type when creating real inputs)r{   rd   r   rq  r   hintr   r   r   r  r  )r  s    r_   materializez;GraphLowering.codegen_with_cpp_wrapper.<locals>.materialize  s~     y##Aenn'EF vv{{*#Az2%ay()u||  SCc$q'lRS    !r`   r,   )clone_preserve_stridesztriton.autotune_at_compile_time),r   r   r-   patchcompile_to_modulecalltritonautotune_at_compile_timer   rd   r   rq  r   r  r\  _guardsTracingContexttry_getr{   rS   real_inputsrR   output_stridesclearparams_flat	itertoolschainr   r  
compile_fxr  r   r   r  r  r  r  r  r  r   r   precomputed_replacementsinv_precomputed_replacementscodegen)r   compiledr  tracing_contextparamr  r  r  r  r<  r   r  mutated_inps                r_   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrapper  s5    T&&&$D 3T:; 9113889 ==99!U\\5>>5<<GH!3u||34!  #(--">">"F"F"H".zMM;8 '55'66<<> &5%@%@#! , #K # "+amm!L# $A#K #  *!--E !//!"# $A#K # &&B *343D3D)E*%C4#6#66&{3'7F *& *  2 ( '2#&6)+u||DDD+A++NC('( [[11HHJ *[)*  $D  &&(##))+##))+GG55;;=GG99??A@%HI &||~& & <<>!o9 98#
##*&* *& &s;   N"N/7N4N9 7N>	O8O"N,OOc                     ddl m} | j                           || j                        | _         t        j
                  j                  | j                  | j                   j                         | j                  j                  |        | j                   j                          t        j                  dt        j                  j                         | j                  j                  | j                         }| j                  j#                          |S )Nr,   	SchedulerzFFinished codegen for all nodes. The list of kernel names available: %s)r  r  r  r   rS   rZ  draw_orig_fx_graphr'  r   r	  push_codegened_graphr  rY  r   r/  generater   pop_codegened_graph)r   r  r  s      r_   r  zGraphLowering.codegen  s    ( "4??3	""4<<1E1EF..t4 		TGG,,	

 ""++D,=,=>--/r`   c                     ddl m} |j                  | _        |j                  | _        |j                  | _         || j
                        | _         | j                   j                          y)a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kerenls). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        r,   r  N)r  r  r	  r  r   r   r  )r   parent_graphr  s      r_   codegen_subgraphzGraphLowering.codegen_subgraph  sP     	)(55&11'33"4??3 r`   c                     d}g }g }| j                   j                  D ]N  }|j                         }||z  }|j                  ||dz  f       |j                  ||j	                         f       P |||fS )Nr   r]  )r  r   get_read_write_buffers_sizesr  get_estimated_runtime)r   total_bytesnode_countsnode_runtimesr   	num_bytess         r_   count_byteszGraphLowering.count_bytes  s    
 NN(( 	GD99;I9$Ki1n56  $(B(B(D!EF		G K66r`   codec                      y r]   r^   r  s    r_   save_output_codezGraphLowering.save_output_code&  s     	r`   c                 j    t        ddd      5  | j                         cd d d        S # 1 sw Y   y xY w)NzGraphLowering.compile_to_modulecode_genF)
phase_namefwd_only)r   _compile_to_moduler  s    r_   r  zGraphLowering.compile_to_module+  s4    -*u
 	- **,	- 	- 	-s   )2c                 H   ddl m} | j                  r| j                         n| j	                         \  }t
        j                         t        j                  d       	 |D cg c]  \  }}||j                  f }}}|j                        \  }t        dfdfd       |j                  ||i | j                  | j                  	      }|| _        | _        || _        |j&                  J t)        |j&                         t*        j                  d
|j&                         t        j,                  d
|j&                         t.        j0                  r(t3        d|j&                   t4        j6                         t8        j                  j;                  |j&                         t8        j                  j=                  t>        j@                  jC                  |j&                        d   dz          |S c c}}w # t        $ r t        dfd        w xY w)Nr,   )PyCodeCacheOutput code: 
%sinductor_output_codec                      d iS )Nfilenamer^   )paths   r_   <lambda>z2GraphLowering._compile_to_module.<locals>.<lambda>G  s    T* r`   c                       S r]   r^   r  s   r_   r	  z2GraphLowering._compile_to_module.<locals>.<lambda>H      4 r`   )
payload_fnc                       S r]   r^   r  s   r_   r	  z2GraphLowering._compile_to_module.<locals>.<lambda>A  r  r`   )linemapattrszOutput code written to: %szCompiled module path: )filer   z.debug)"	codecacher  r   r  r  r   r  rV   rZ  stack_tracewriter   r  load_by_key_pathr   r   r!  r"  r#  __file__rX   rY  r  r-   benchmark_kernelprintr  stderrrS   output_coder   osr  splitext)	r   r  r  line_nor   rQ  modr  r  s	          @@r_   r  z GraphLowering._compile_to_module1  s   * 04/?/?D))+T\\^ 	g 	&&t,148	HOP}w!1!12PGP#))$/IC &*' **@T^^@t'?'?@	 + 
 $ ||'''%		.=93<<H""*3<<.9

K	CLL)	RWW%%cll3A6AB
K Q 	&'
 	s   $H )H H  H H!c                    | j                   rddlm} | j                  sJ d       | j	                         \  }}t        j                  d|       d }| j                  r1| j                  | j                        }t        j                  d|       |j                  | ||| j                        S | j                         j                  S )Nr,   )AotCodeCompilerz"AOT mode only supports C++ wrapperr  z#Serialized Extern Kernel Nodes: 
%s)rb   )r   r  r  r   r  rV   rZ  r
  r   compilerb   r  r  )r   r  r  r  serialized_extern_kernel_nodess        r_   compile_to_fnzGraphLowering.compile_to_fnb  s    ==2##I%II# 99;MD'!!"5t<-1*''151L1L,,2.  %%:2 #**d: +   ))+000r`   c                     | j                   D cg c]F  }t        |t        j                        s*t        |t        j                        s|j                         H c}S c c}w r]   )r   r{   r.   NoneAsConstantBufferShapeAsConstantBufferr  )r   r   s     r_   get_output_nameszGraphLowering.get_output_names{  sP     **
dB$;$;<tR%=%=> MMO
 	
 
s   AAc                     || j                   j                         v xrL | j                   |   j                         dk(  xr* | j                   |   j                         j                  dk(  xs || j
                  v S )Nr,   r8  )r   r   r  r  r  r   rs  s     r_   is_unspec_argzGraphLowering.is_unspec_arg  s     D%%**,, C!!$'113q8C!!$'22499UB3 T222		3r`   )NNNFFNNNFFNNNNr]   )r[   N)r  r   r[   N)d__name__
__module____qualname__r   r.   r  __annotations__rd   r   r   r   r  r   r   r|   r   r   r   r   r%   rl   r	   r  r   ExternKernelNoder   r   r]  r   r/   r3  staticmethodr   rt  rw  r*   r(   r  r  r  property_subclassesfake_tensorFakeTensorModer`  r@   r  r  r  ra   r  r  r  r  r  r  r  r
   r  r  r  r  r  r  objectr  r   r  r9   r  rA   r  r   r  r	  r  r  r   r&  r  r   r  rR  rs  r  r  r  r  r  rJ   r\  r  r  r   r  r  r"  r&  r(  __classcell__)r0  s   @r_   r   r     s%   		?"#,,#	uT#YT
*+U49d4j3H-II	J#J,,	tEJJejj!11	2 8<(,"&!:>%) "$7;$(26"%WFHH  WF !ell!34WF H%	WF
 3-WF WF WF 'tCI7WF TNWF !)d2../0#56!
WF WF WF %T#s(^4WF  SM!WF" /#WF$ sm%WF& 
'WFrMEOO..55v=>MIWM	M ak aD aT a aF  
HH  
 U\\*
 	

 

20D1A 0dA# A$ A
Dell Dt D 5,,88GG  	%bii/0	1$Qc QeBLL"))4K.L Q
8S 
8U[[ 
88S 8U39-= 8& & &R\\ c  FK bii d s tCy S 
 "))!4bii!?@	  
3 
5<< 
 'SM').v'	'T 37	
	
"*3-	
		
# 8N SV  99!&v98<S&[8I9	tY$	%9v-H -C -c3h -TW -^ 5u|| 5 5 5 7 7!&r 748f4E 7	xBKK@	A 7D# S # ( # S # ( L
L
!&vL
8<S&[8IL
	L
\  $UXX]] $ $9UbllBKK/09U  c5<<&7 8# =>9U 
r||R[[(	)	9Uv):): *): cN	):
 *): cN): 
):Vb%((-- bF bHQ&<d"%T%T	:J5K0K*L d"LsDsDy)9$::; (!&7	T%)3./0$u=NPU=U7V2WW
7  s t  -: -/J /b1s 12
$s) 
3# 3$ 3r`   r   )r*  r  loggingr  r  r  r  r  collectionsr   
contextlibr   typesr   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r|   r   rd   torch._loggingtorch.fxr   r   torch._decompr   torch._dynamo.utilsr   r   r   r   torch._prims_commonr   torch._subclasses.fake_tensorr   r   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r    %torch.fx.experimental.symbolic_shapesr!   r"   r#   r$   r%   r&   torch.fx.graphr'   torch.fx.noder(   torch.utils._mode_utilsr)   torch.utils._ordered_setr*   torch.utils._sympy.numbersr+   r   r-   r.   codegen.commonr/   r0   r1   r2   r3   r4   excr5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   r  rB   rC   rD   rE   rF   rG   rH   rI   r  rJ   r   rK   r  rL   rM   rN   rO   rP   rQ   virtualizedrR   rS   torch._higher_order_ops.effectsrT   codegen.wrapperrU   torch._inductor.codecacherV   	getLoggerr)  rY  _logginggetArtifactLoggerr~  rS  r   countr  r  torch._inductor.fb.utilsrX   rl   rx   ra   r   r   r  r  r  r   r   r   Interpreterr   r^   r`   r_   <module>rT     s       	 	 
  # %            , 4 7 > 4   ? L  !  / / -   
 
 
	 	 	 ) &  ( ;/ 5 g!00<Hyy~~*9??, 68s c d $%,, $d $t $25:: (5;;BW $  
	!
6588((+56B/B/$,T#t)_$=B/	B/JH3EHH(( H3r`   