
    sgG)                       U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlZd dlZd dlmZ d dlmZ d dl m!Z! d d	l"m#Z# d d
l$m%Z%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< ejz                  j}                  e?d      Z@d ZAej                   G d d             ZCej                   G d d             ZDej                   G d d             ZEej                   G d d             ZFeeCeDeEf   ZGi ZHeeIeFf   eJd<    G d d      ZKi ZLeeIeKf   eJd<    eMd      fdeIded eMd!eMfd"ZN G d# d$e
      ZOdeej                  eIf   fd%ZQd& ZRdeIfd'ZSddeId(eTfd)ZU ej                  d      d*        ZWd+eej                     fd,ZYdeId-eKfd.ZZdeIfd/Z[ ej                  d      d0        Z\ej                  ej                  ej                  ej                  iej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  ej                  fD  ci c]  } | |  c} Zjd1eId2eej                     fd3Zl G d4 d5      Zm G d6 d7e      Zn G d8 d9en      Zo G d: d;      Zpej                   G d< d=             Zq erdi d> eqej                  d? d@A      dB eqej                  dC dD dEF      dG eqej                  dH dI dJF      dK eqej                  dL dM dNF      dO eqej                  dP dQ dRF      dS eqej                  dT dU dSV      dW eqej                  dX dY dZF      d[ eqej                  d\ d] d^ d[_      d` eqej                  da d`A      db eqej                  dc dbA      dd eqej                  de dfA      dg eqej                  dh diA      dj eqej                  dk dl dm djn      do eqej                  dp dq drV      ds eqej                  dt du dvF      dw eqej                  dx dyA      dz eqej                  d{ d|A      d} eqej                  d~ d dF      d eqej                  d d dF      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d dA      d eqej                  d d¬A      ZteeIeqf   eJd<   depd2e8eI   fdńZu G dƄ de0      Zv G dȄ de2      Zw G dʄ de      Zx G d̄ dͫ      Zy G d΄ dϫ      Zz G dЄ dey      Z{ G d҄ dӫ      Z| G dԄ dի      Z} G dք d׫      Z~ G d؄ de}      Zej                   G dڄ d۫             Z ej                  d      d܄        Z G d݄ dޫ      Zyc c} w )    N)autoEnum)chain)	AnyCallableClassVarDictList
NamedTupleOptionalTupleUnion)Printer)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)
OrderedSet)int_oo)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRangeAnalysisValueRanges   )configmetrics)DeferredLineBasegenerate_assertIndentedBuffer	sympy_dot
sympy_subsunique)ops
OpsHandlerOpsValueReductionType	StoreModeVschedulec                 x    t         j                  t        j                        rt         j	                  d|        y y )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)msgs    Q/var/www/html/venv/lib/python3.12/site-packages/torch/_inductor/codegen/common.pydata_type_loggerr2   3   s*      /6< 0    c                   :    e Zd ZU dZej
                  ed<   eed<   y)WorkspaceArgzA temporary buffer used for a single kernel, then discarded.

    Not registered as a traditional buffer since there are no users,
    so it would be dead code eliminated.
    nbytes	zero_fillN)__name__
__module____qualname____doc__sympyExpr__annotations__bool r3   r1   r5   r5   8   s     JJOr3   r5   c                       e Zd ZU eed<   eed<   ej                  ed<    ej                  d      Z	ej                  ed<   dZee   ed<   y)	TensorArgnamebufferdtyper   offsetNalias_of)r8   r9   r:   strr>   torchrE   r<   IntegerrF   r=   rG   r   r@   r3   r1   rB   rB   D   s=    
IK;;&q)FEJJ)"Hhsm"r3   rB   c                   F    e Zd ZU eed<   ej                  ed<   ed        Zy)SizeArgrC   exprc                      y Nr@   selfs    r1   rG   zSizeArg.alias_ofR   s    r3   N)	r8   r9   r:   rH   r>   r<   r=   propertyrG   r@   r3   r1   rL   rL   M   s#    
I
** r3   rL   c                   <    e Zd ZU eed<   eed<    ed      Zeed<   y)DeviceCodegen
schedulingwrapper_codegenNcpp_wrapper_codegen)r8   r9   r:   r   r>   typerW   r@   r3   r1   rT   rT   W   s    O $T
*r3   rT   device_codegensc                   $    e Zd Zd Zd Zd Zd Zy)DeviceOpOverridesc                     t         rO   NotImplementedErrorrQ   rC   s     r1   import_get_raw_stream_asz*DeviceOpOverrides.import_get_raw_stream_asd       !!r3   c                     t         rO   r]   rQ   
device_idxs     r1   
set_devicezDeviceOpOverrides.set_deviceg   ra   r3   c                     t         rO   r]   rP   s    r1   synchronizezDeviceOpOverrides.synchronizej   ra   r3   c                     t         rO   r]   rc   s     r1   device_guardzDeviceOpOverrides.device_guardm   ra   r3   N)r8   r9   r:   r`   re   rg   ri   r@   r3   r1   r[   r[   c   s    """"r3   r[   device_op_overrides_dictdevicedevice_schedulingdevice_wrapper_codegendevice_cpp_wrapper_codegenc                 ,    t        |||      t        | <   y rO   )rT   rY   )rk   rl   rm   rn   s       r1   register_backend_for_devicerp      s     ,13MOFr3   c                       e Zd Z e       Z e       Z e       Z e       Z e       Z e       Z	 e       Z
 e       Z e       Z e       Zy)BackendFeatureN)r8   r9   r:   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTr@   r3   r1   rr   rr      sL    fGIfO $6D6DfO"fv#vr3   rr   c                     t                t        | t        j                        r| j                  }n)t        | t
              sJ | }t        j                  |      } t        |      } |d       j                  |       S rO   )init_backend_registration
isinstancerI   rk   rX   rH   get_scheduling_for_deviceget_backend_features)rk   device_typerU   s      r1   r   r      sb    &%,,'kk&#&&&k**;7Jd0088r3   c                 @    t        |t              sJ |t        |       v S )zSee also V.graph.has_feature)r   rr   r   )rk   features     r1   has_backend_featurer      s#    g~...*6222r3   c                 <    | t         v rt         |    j                  S d S rO   )rY   rU   )rk   s    r1   r   r      s     17?1J?6"--TPTTr3   cpp_wrapperc                 Z    | t         v r#t         |    }|r|j                  S |j                  S y rO   )rY   rW   rV   )rk   r   wrapper_codegen_objs      r1   get_wrapper_codegen_for_devicer      s?     -<V-D   33	
 %44	
 r3   c                     ddl m}  ddlm} ddlm} ddlm} ddlm	} ddl
m} ddlm} t        d	      | |d
t        d	fd||       t        d      ||dt        dfd||       t        d      t        d||       t         j"                  j%                         }|dk7  rCt        |      7ddlm} 	  |d      }	 |d      }
 |d      }|	r|
r|rt        ||	|
|       y y y y y y # t*        $ r Y y w xY w)N   )CppScheduling)CppWrapperCpu)CppWrapperCuda)CUDACombinedScheduling)HalideScheduling)TritonScheduling)WrapperCodeGencpu)cpphalidec                  6     t         j                     | i |S rO   )r   cpu_backend)argskwargscpu_backendss     r1   <lambda>z+init_backend_registration.<locals>.<lambda>   s    $DL1C1C$Dd$Uf$U r3   cuda)tritonr   c                  6     t         j                     | i |S rO   )r   cuda_backend)r   r   cuda_backendss     r1   r   z+init_backend_registration.<locals>.<lambda>   s     $FM&2E2E$F$WPV$W r3   xpuprivateuseoner   )_get_custom_mod_func
Schedulingr   CppWrapperCodeGen)r   r   cpp_wrapper_cpur   cpp_wrapper_cudar   cuda_combined_schedulingr   r   r   r   r   wrapperr   r   rp   rI   _C_get_privateuse1_backend_name torch.utils.backend_registrationr   RuntimeError)r   r   r   r   r   r   r   private_backendr   rl   rV   rW   r   r   s               @@r1   r~   r~      s   ".0@((' '/,8HI#U		
 !(0#9EUV#W		
 !'/#E+;^Lhh<<>O?*%o6>I	 4\ B23CDO"67J"K _9L+#%#'	 :M_  ? 	+   		s   <,C. .	C:9C:indexc                 L    ddl m} g | t        ||j                  |            S )Nr   )FlexibleLayout)irr   r    contiguous_strides)r   
index_varssizesr   s       r1   index_prevent_reorderingr      s*    # UUTIj.*K*KE*RSTTr3   device_op_overridesc                     |t         | <   y rO   )rj   )rk   r   s     r1   register_device_op_overridesr      s    ':V$r3   c                     t        | t              sJ t        j                         sddlm} ddlm} | t        j                         v r	t        |    S y )Nr   )r   )r   rH   rj   keysr   r   r   )rk   r   xpu_op_overridess      r1   get_device_op_overridesr     sF    fc"""#((*-@)..00'// 1r3   c                       y)N)
isinfisnanlogical_notsignbitleltgegteqner@   r@   r3   r1   boolean_opsr     s    r3   op_namereturnc                    | t               v rt        j                  S | dv rd|v r|d   S |d   S | dv rt        j                  S | dv rt        j                  S | dk(  rd|v r|d   S |d   S | dk(  rd|v r|d   n|d   }t
        |   S | d	v r$|d   }t        j                  j                  |      S | d
k(  rd|v r|d   S |d   S y)zK
    Given op name and a list of input dtypes, deduce the output dtype
    )to_dtype
index_exprrE   )randrandn)	get_index	randint64	load_seed	reductionr   constant)loadstorestore_reductionto_dtype_bitcastN)	r   rI   r?   floatint64DTYPE_TO_COMPUTATION_DTYPEr(   graph	get_dtype)r   r   r   rE   buf_names        r1   deduce_output_dtype_by_namer   5  s    +-zz	  
 #*V"3vgAbA	  
 {{	  

 {{	K	")V"3vg@a@	J	#*f#4w$r()%00	  

 7ww  **	&	&")V"3vgAbAr3   c                   
   e Zd ZddZdej
                  j                  fdZdej
                  j                  fdZdej
                  j                  fdZ	dej
                  j                  fdZd	 Zed
        Zed        Zy)DataTypePropagationNc                     || _         d|j                  j                  i| _        |j                  j                         D ]  \  }}|j                  | j                  |<     y Nroot)body
root_blockr   graphs	subblocksitems)rQ   r   kvs       r1   __init__zDataTypePropagation.__init__a  sU    	DOO))B
 NN((* 	%DAqWWDKKN	%r3   nodec                    |j                   }|D cg c]9  }t        |t        j                  j                        s(|j
                  dk7  s8|; }}t        |      dk(  ry t        d |D              }|sy t        j                  t        j                  |D cg c])  }|j                  t        j                     j                  + c}      S c c}w c c}w )Nplaceholderr   c              3      K   | ]K  }t         j                  |j                  v xr) |j                  t         j                     j                  d u M y wrO   )OptimizationContextkeymetarE   ).0ns     r1   	<genexpr>zBDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>q  sS      )
   ##qvv- B*../55TAB)
s   AA)all_input_nodesr   rI   fxNodeoplenall	functoolsreducepromote_typesr   r   r   rE   )rQ   r   inputsr   input_nodesall_input_nodes_propagateds         r1   deduce_node_dtype_by_inputsz/DataTypePropagation.deduce_node_dtype_by_inputsi  s    %%
Auxx}}!=!$$-BWA
 
 {q %( )
 !)
 &
"
 *<GHqQVV'++,22H
 	

  Is   )CCC.C
c                 b    | j                   |j                     }| j                  |      }|sJ |S rO   )r   targetpropagate_graph)rQ   r   	sub_graphrE   s       r1   deduce_node_dtype_by_subgraphz1DataTypePropagation.deduce_node_dtype_by_subgraph~  s0    KK,	$$Y/ur3   c                    |j                   dk(  ry |j                  dk(  rt        |j                        dk7  ry |j                  t        j
                  k(  r| j                  |j                  d         S t        |j                  t              sJ |j                  j                  d      r| j                  |      S t        |j                  g|j                  i |j                  x}	 |S | j                  |      S )Nr   outputr   r   masked_subblock)r   r	  r   r   operatorgetitemdeduce_node_dtyper   rH   
startswithr  r   r   r  )rQ   r   output_dtypes      r1   r  z%DataTypePropagation.deduce_node_dtype  s    77m#;;("s499~':;;(***))$))A,77$++s+++;;!!"3455d;; 8 ++ L
   //55r3   r   c                 n   |j                   sJ d }|j                   D ]  }t        j                  |j                  v r|j                  t        j                     }n
t               }| j	                  |      |_        ||j                  t        j                  <   |j                  dk(  s|j
                  } |S )Nr  )nodesr   r   r   r  rE   r	  )rQ   r   graph_dtyper   opt_ctxs        r1   r
  z#DataTypePropagation.propagate_graph  s    {{{ KK 		,D"&&$))3))$7$;$;<-/ 2248GM18DII)--.{{h&%mm		, r3   c                 @    | j                  | j                  d          y r   )r
  r   rP   s    r1   	propagatezDataTypePropagation.propagate  s    T[[01r3   c                 .     | |      j                         S rO   )r  )clsr   s     r1   propagate_loopbodyz&DataTypePropagation.propagate_loopbody  s    4y""$$r3   c                     ddl m} ddlm} t	        ||      sJ t	        |j
                  |      sJ t        j                  |j
                         y )Nr   )LoopBody)SchedulerNode)	loop_bodyr  	schedulerr   r   _bodyr   r  )r  r   r  r   s       r1   propagate_scheduler_nodez,DataTypePropagation.propagate_scheduler_node  s>    (-$...$**h///..tzz:r3   r   N)r8   r9   r:   r   rI   r   r   r  r  r  Graphr
  r  classmethodr  r$  r@   r3   r1   r   r   `  s    %
 
*%((-- 6ehhmm 66UXX^^ $2 % % ; ;r3   r   c                        e Zd Zed        Zd Zd Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zdddef fdZ xZS )ExprPrinterc                     d }t        | t              sOt        j                  d| t        j                        s*t        j                  d| t        j                        s| dk(  r| S  ||       r| S d|  dS )Nc                     | d   dk7  st        |       dk  ryd}t        | dd        D ]3  \  }}|dk(  r|dz  }n
|dk(  r|dz  }|dk(  s!|t        |       dz
  k7  s3 y |dk(  sJ y)Nr   (r   Fr   )T)r   	enumerate)stringcountichars       r1   all_in_parensz(ExprPrinter.paren.<locals>.all_in_parens  s    ayC3v;?E$VABZ0 !43;QJES[QJEA:!s6{Q"6 ! A::r3   z^[a-z0-9_.]+$z^\([^)]*\)$ r,  r-  )r   CSEVariablerematch
IGNORECASE)r/  r3  s     r1   parenzExprPrinter.paren  sc    	 v{+xx(&"--@xx>|M M6(!}r3   c           	          d|j                    dj                  t        | j                  t        | j                  |j
                                    S )N )rel_opjoinmapr9  _printr   rQ   rM   s     r1   _print_RelationalzExprPrinter._print_Relational  s:    4;;-q!&&s4::s4;;		7R'STTr3   c           	          dj                  t        | j                  t        | j                  |j                                    S )N*r=  r>  r9  r?  r   r@  s     r1   
_print_MulzExprPrinter._print_Mul  s+    xxDJJDKK(CDEEr3   c           	          dj                  t        | j                  t        | j                  |j                                    S )Nz + rD  r@  s     r1   
_print_AddzExprPrinter._print_Add  +    zz#djj#dkk499*EFGGr3   c           	          dj                  t        | j                  t        | j                  |j                                    S N % rD  r@  s     r1   
_print_ModzExprPrinter._print_Mod  rH  r3   c                     |j                   \  }}| j                  | j                  |             d| j                  | j                  |             S Nz / r   r9  r?  rQ   rM   lhsrhss       r1   _print_FloatTrueDivzExprPrinter._print_FloatTrueDiv  F    99S**T[[-./s4::dkk#>N3O2PQQr3   c                 $    | j                  |      S rO   )_print_FloorDivr@  s     r1   _print_CleanDivzExprPrinter._print_CleanDiv  s    ##D))r3   c                 >    | j                  |j                  d         S Nr   )r?  r   r@  s     r1   _print_IdentityzExprPrinter._print_Identity  s    {{499Q<((r3   c           	          dj                  t        | j                  t        | j                  |j                                    S )Nz >= rD  r@  s     r1   _print_GreaterThanzExprPrinter._print_GreaterThan  s-     {{3tzz3t{{DII+FGHHr3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   zalign(r   r-  r   r   r?  r@  s     r1   _print_alignzExprPrinter._print_align  9    499~"""DIIaL12!44r3   c                     |j                   \  }}| j                  |      }|t        |      k(  sJ |       t        |      }|dk\  sJ |dkD  r$dj                  | j	                  |      g|z        S y)Nr   rC  1)r   r?  intr=  r9  rQ   rM   baseexps       r1   
_print_PowzExprPrinter._print_Pow  sp    II	c{{4 c#h###haxx788TZZ-.455r3   c                 0    t        dt        |              )Nz#_print_ToFloat not implemented for r^   rX   r@  s     r1   _print_ToFloatzExprPrinter._print_ToFloat  s    !$GT
|"TUUr3   c                 0    t        dt        |              )Nz$_print_Infinity not implemented for ri  r@  s     r1   _print_InfinityzExprPrinter._print_Infinity      !$Hd"UVVr3   c                 0    t        dt        |              )Nz,_print_NegativeInfinity not implemented for ri  r@  s     r1   _print_NegativeInfinityz#ExprPrinter._print_NegativeInfinity"  s    !:4:,G
 	
r3   c                 0    t        dt        |              )Nz$_print_FloorDiv not implemented for ri  r@  s     r1   rV  zExprPrinter._print_FloorDiv'  rm  r3   c                 0    t        dt        |              )Nz%_print_PythonMod not implemented for ri  r@  s     r1   _print_PythonModzExprPrinter._print_PythonMod*  s    !$I$t*"VWWr3   c                 0    t        dt        |              )Nz&_print_IntTrueDiv not implemented for ri  r@  s     r1   _print_IntTrueDivzExprPrinter._print_IntTrueDiv-      !$J4PT:,"WXXr3   c                 0    t        dt        |              )Nz(_print_PowByNatural not implemented for ri  r@  s     r1   _print_PowByNaturalzExprPrinter._print_PowByNatural0      !6tDzlC
 	
r3   c                 0    t        dt        |              )Nz$_print_FloatPow not implemented for ri  r@  s     r1   _print_FloatPowzExprPrinter._print_FloatPow5  rm  r3   c                 0    t        dt        |              )Nz&_print_TruncToInt not implemented for ri  r@  s     r1   _print_TruncToIntzExprPrinter._print_TruncToInt8  ru  r3   c                 0    t        dt        |              )Nz&_print_RoundToInt not implemented for ri  r@  s     r1   _print_RoundToIntzExprPrinter._print_RoundToInt;  ru  r3   c                 0    t        dt        |              )Nz(_print_RoundDecimal not implemented for ri  r@  s     r1   _print_RoundDecimalzExprPrinter._print_RoundDecimal>  rx  r3   c                 0    t        dt        |              )Nz(_print_TruncToFloat not implemented for ri  r@  s     r1   _print_TruncToFloatzExprPrinter._print_TruncToFloatH  rx  r3   T)simplifyr  c                    |r]t        |t        j                        rCt        t        j
                  d      r)t        j
                  j                  j                  |      }t        | %  |      S )Nsizevars)
r   r<   r=   hasattrr(   r   r  r  superdoprint)rQ   rM   r  	__class__s      r1   r  zExprPrinter.doprintM  sI    
44*9U77##,,T2Dwt$$r3   )r8   r9   r:   staticmethodr9  rA  rE  rG  rL  rS  rW  rZ  r\  r_  rg  rj  rl  ro  rV  rr  rt  rw  rz  r|  r~  r  r  r?   r  __classcell__r  s   @r1   r)  r)    s     6UFH
HR*)I5	 VW

WXY

WYY


 15 % % %r3   r)  c                       e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z y) PythonPrinterc                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   zfloat(r   r-  r^  r@  s     r1   rj  zPythonPrinter._print_ToFloatU  r`  r3   c                    |j                   \  }}}| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|dk7  r	d| d| d}| d| S )Nrb  r,   // r-  rK  r   r9  r  )rQ   rM   xdivmods        r1   _print_ModularIndexingz$PythonPrinter._print_ModularIndexingY  s    ii3JJt||A'jjc*+jjc*+#:A3d3%q!ACu~r3   c                      y)Nzmath.infr@   r@  s     r1   rl  zPythonPrinter._print_Infinityb  s    r3   c                      y)Nz	-math.infr@   r@  s     r1   ro  z%PythonPrinter._print_NegativeInfinitye  s    r3   c           	          dj                  t        | j                  t        | j                  |j                                    S rJ  rD  r@  s     r1   rr  zPythonPrinter._print_PythonModi  rH  r3   c                     |j                   \  }}| j                  | j                  |            }| j                  | j                  |            }d| d| dS )Nr,  r  r-  r  )rQ   rM   r  r  s       r1   rV  zPythonPrinter._print_FloorDivm  sQ    3JJt||A'jjc*+1#T#a  r3   c                     |j                   \  }}| j                  | j                  |             d| j                  | j                  |             S rN  rO  rP  s       r1   rt  zPythonPrinter._print_IntTrueDivu  rT  r3   c                 ,    d| j                  |       dS )Nz
math.sqrt(r-  )r?  r@  s     r1   _helper_sqrtzPythonPrinter._helper_sqrty  s    DKK-.a00r3   c                 >    | j                  |j                  d         S rY  )r  r   r@  s     r1   _print_OpaqueUnaryFn_sqrtz'PythonPrinter._print_OpaqueUnaryFn_sqrt|  s      1..r3   c                     |j                   \  }}| j                  | j                  |             d| j                  | j                  |             S Nz ** rO  rd  s       r1   rz  zPythonPrinter._print_FloatPow  F    II	c**T[[./0TZZC@P5Q4RSSr3   c                     |j                   \  }}| j                  | j                  |             d| j                  | j                  |             S r  rO  rd  s       r1   rw  z!PythonPrinter._print_PowByNatural  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS Nr   zmath.floor(r   r-  r^  r@  s     r1   _print_floorzPythonPrinter._print_floor  9    499~"""T[[167q99r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS r  r^  r@  s     r1   _print_FloorToIntzPythonPrinter._print_FloorToInt  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   zmath.trunc(r   r-  r^  r@  s     r1   r|  zPythonPrinter._print_TruncToInt  s9    499~"""T[[167q99r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS Nr   z
math.ceil(r   r-  r^  r@  s     r1   _print_ceilingzPythonPrinter._print_ceiling  9    499~"""DKK		!56a88r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS r  r^  r@  s     r1   _print_CeilToIntzPythonPrinter._print_CeilToInt  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   zabs(r   r-  r^  r@  s     r1   
_print_AbszPythonPrinter._print_Abs  s9    499~"""dkk$))A,/022r3   c                     t        |j                        dk\  sJ ddj                  t        | j                  |j                               dS )Nr   zmax(, r-  r   r   r=  r>  r?  r@  s     r1   
_print_MaxzPythonPrinter._print_Max  @    499~"""diiDKK ;<=Q??r3   c                     t        |j                        dk\  sJ ddj                  t        | j                  |j                               dS )Nr   zmin(r  r-  r  r@  s     r1   
_print_MinzPythonPrinter._print_Min  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z	math.cos(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_cosz&PythonPrinter._print_OpaqueUnaryFn_cos  9    499~"""4;;tyy|45Q77r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.cosh(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_coshz'PythonPrinter._print_OpaqueUnaryFn_cosh  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.acos(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_acosz'PythonPrinter._print_OpaqueUnaryFn_acos  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z	math.sin(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_sinz&PythonPrinter._print_OpaqueUnaryFn_sin  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.sinh(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_sinhz'PythonPrinter._print_OpaqueUnaryFn_sinh  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.asin(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_asinz'PythonPrinter._print_OpaqueUnaryFn_asin  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z	math.tan(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_tanz&PythonPrinter._print_OpaqueUnaryFn_tan  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.tanh(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_tanhz'PythonPrinter._print_OpaqueUnaryFn_tanh  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   z
math.atan(r   r-  r^  r@  s     r1   _print_OpaqueUnaryFn_atanz'PythonPrinter._print_OpaqueUnaryFn_atan  r  r3   c                 z    t        |j                        dk(  sJ d| j                  |j                  d          dS )Nr   round(r   r-  r^  r@  s     r1   r~  zPythonPrinter._print_RoundToInt  r`  r3   c                     t        |j                        dk(  sJ |j                  \  }}t        |t        j                        sJ d| j                  |       d| dS )Nr   r  r  r-  )r   r   r   r<   rJ   r?  )rQ   rM   numberndigitss       r1   r  z!PythonPrinter._print_RoundDecimal  sX    499~"""))'5==111F+,Bwiq99r3   N)!r8   r9   r:   rj  r  rl  ro  rr  rV  rt  r  r  rz  rw  r  r  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r~  r  r@   r3   r1   r  r  T  s    5H!R1/T
T:::
993@@8998998995:r3   r  c                   T    e Zd Z fdZd Zed        Zed        Zed        Zed        Z	ed        Z
ed        Zed	        Zed
        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Z ed        Z!ed        Z"ed         Z#ed!        Z$ed"        Z%ed#        Z&ed$        Z'e(d%        Z) xZ*S )&OpOverridesc                 0    t         |           || _        y rO   )r  r   _parent)rQ   parentr  s     r1   r   zOpOverrides.__init__  s    r3   c                 .    t        | j                  |      S rO   )getattrr  )rQ   items     r1   __getattr__zOpOverrides.__getattr__  s    t||T**r3   c                     | S rO   r@   )values    r1   identityzOpOverrides.identity  s	     r3   c                     t        |       S rO   )repr)r  rE   s     r1   r   zOpOverrides.constant  s    E{r3   c                 r    t        j                  t        j                  dt        j                        |       S Nr   )r#   truedivr   rI   int32r  s    r1   
reciprocalzOpOverrides.reciprocal  s"    {{3<<5;;7;;r3   c                 .    t        j                  | |       S rO   )r#   mulr  s    r1   squarezOpOverrides.square  s    wwq!}r3   c                     t        j                  t        j                  dt        j                        t        j
                  |             S r  )r#   subr   rI   float32erfr  s    r1   erfczOpOverrides.erfc  s*    wws||Au}}5swwqzBBr3   c                     t        j                  t        j                  t        j                  |             t        j                  |             S rO   )r#   r  rf  r  r  r  s    r1   erfcxzOpOverrides.erfcx  s,    wwswwszz!}-sxx{;;r3   c                     t        j                  t        j                  |       t        j                  dt        j
                              S r  )r#   r  rf  r   rI   r  r  s    r1   expm1zOpOverrides.expm1  s*    wwswwqz3<<5==#ABBr3   c           	          t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )Nr   
   r#   r  logr   mathrI   r  r  s    r1   log10zOpOverrides.log10   s7    wwswwqz3<<DHHRL0@%--#PQQr3   c           	          t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )Nr   r   r  r  s    r1   log2zOpOverrides.log2  s6    wwswwqz3<<DHHQK#OPPr3   c           
          t        j                  t        j                  | t        j                  t	        j
                  d      t        j                                    S )Nr   )r#   rf  r  r   r  r  rI   r  r  s    r1   exp2zOpOverrides.exp2  s3    wwswwq#,,txx{EMM"JKLLr3   c           	          t        j                  t        j                  | t        j                  dt        j
                                    S r  )r#   r  addr   rI   r  r  s    r1   log1pzOpOverrides.log1p  s+    wwswwq#,,q%++">?@@r3   c                     t        j                  dt        j                        }t        j                  |t        j
                  |t        j                  t        j                  |                         S r  )r#   r   rI   r  r  r  rf  negr  ones     r1   sigmoidzOpOverrides.sigmoid  sC    ll1ekk*{{3SWWSWWQZ-@ ABBr3   c                     t        j                  dt        j                        }t        j                  |t        j
                  |t        j                  t        j                  |                         S r  )r#   r   rI   r  r  r  libdevice_expr   r  s     r1   libdevice_sigmoidzOpOverrides.libdevice_sigmoid  sE    ll1ekk*{{3S->->swwqz-J KLLr3   c                 r    t        j                  | t        j                  dt        j                              S rY  )r#   maximumr   rI   r  r  s    r1   reluzOpOverrides.relu  s"    {{1cll1ekk:;;r3   c                 ,    t        j                  |       S rO   )r#   absr  s    r1   libdevice_abszOpOverrides.libdevice_abs      wwqzr3   c                 ,    t        j                  |       S rO   )r#   sqrtr  s    r1   libdevice_sqrtzOpOverrides.libdevice_sqrt"  s    xx{r3   c                 ,    t        j                  |       S rO   )r#   cosr  s    r1   libdevice_coszOpOverrides.libdevice_cos&  r  r3   c                 ,    t        j                  |       S rO   )r#   sinr  s    r1   libdevice_sinzOpOverrides.libdevice_sin*  r  r3   c                 ,    t        j                  |       S rO   )r#   r  r  s    r1   libdevice_logzOpOverrides.libdevice_log.  r  r3   c                 ,    t        j                  |       S rO   )r#   rf  r  s    r1   r  zOpOverrides.libdevice_exp2  r  r3   c                 2    dt         j                  |        S )N~r)  r9  r  s    r1   bitwise_notzOpOverrides.bitwise_not6  s    ;$$Q'())r3   c                 2    t         j                  |        dS )Nz == 0r  )as    r1   r   zOpOverrides.logical_not:  s    ##A&'u--r3   c                 \    t         j                  |        dt         j                  |       S )Nz & r  r  ys     r1   bitwise_andzOpOverrides.bitwise_and>  +    ##A&'s;+<+<Q+?*@AAr3   c                 \    t         j                  |        dt         j                  |       S )Nz | r  r!  s     r1   
bitwise_orzOpOverrides.bitwise_orB  r$  r3   c                 \    t         j                  |        dt         j                  |       S )Nz ^ r  r!  s     r1   bitwise_xorzOpOverrides.bitwise_xorF  r$  r3   c                 \    t         j                  |        dt         j                  |       S )Nz << r  r!  s     r1   bitwise_left_shiftzOpOverrides.bitwise_left_shiftJ  +    ##A&'tK,=,=a,@+ABBr3   c                 \    t         j                  |        dt         j                  |       S )Nz >> r  r!  s     r1   bitwise_right_shiftzOpOverrides.bitwise_right_shiftN  r+  r3   c           	         t        j                  | |      }t        j                  t        j                  |t        j                  dt
        j                              t        j                  t        j                  |      t        j                  |                  }t        j                  |t        j                  ||      |      S rY  )
r#   r  and_r   r   rI   r  r   wherer  )r  brconds       r1   	remainderzOpOverrides.remainderR  sy    GGAqMxxFF1cll1ekk23FF3;;q>3;;q>2
 yyswwq!}a00r3   c                 T    t        j                  t        j                  |       |      S rO   )r#   r   truncr  rE   s     r1   trunc_to_intzOpOverrides.trunc_to_int[      ||CIIaL%00r3   c                 T    t        j                  t        j                  |       |      S rO   )r#   r   floorr7  s     r1   floor_to_intzOpOverrides.floor_to_int_  r9  r3   c                 T    t        j                  t        j                  |       |      S rO   )r#   r   ceilr7  s     r1   ceil_to_intzOpOverrides.ceil_to_intc  s    ||CHHQK//r3   c                 T    t        j                  t        j                  |       |      S rO   )r#   r   roundr7  s     r1   round_to_intzOpOverrides.round_to_intg  r9  r3   c                 .    t        j                  | |      S rO   )r#   r  )r  r1  s     r1   int_truedivzOpOverrides.int_truedivk  s    
 {{1a  r3   c                 T    t        j                  | t        j                  |            S rO   )r#   r   r<   rJ   )rC   rF   s     r1   r   zOpOverrides.load_seedr  s    xxemmF344r3   c                     |dv sJ |       t         j                         D ]*  \  }}t        ||      }|t        | |t	        |             , y )N>   r   cppvecr   )pointwise_overrides_datar   r  setattrr  )r  r	  funcnamedataimpls        r1   _initialize_pointwise_overridesz+OpOverrides._initialize_pointwise_overridesv  sW    44<f<46<<> 	7NHd4(D|C<#56		7r3   )+r8   r9   r:   r   r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r   r#  r&  r(  r*  r-  r4  r8  r<  r?  rB  rD  r   r'  rM  r  r  s   @r1   r  r    s   +     < <   C C < < C C R R Q Q M M A A C C M M < <             * * . . B B B B B B C C C C 1 1 1 1 1 1 0 0 1 1 ! ! 5 5 7 7r3   r  c                       e Zd ZU eed<   edef   ed<   dZeedef      ed<   dZeedef      ed<   e	j                  Ze	ed<   y)OverridesDatarC   .r   Nr   rG  type_promotion_kind)r8   r9   r:   rH   r>   r   r   r   rG  r   DEFAULTrP  r@   r3   r1   rO  rO    s\    
I	#s(	+/FHXc3h'(/+/FHXc3h'(/'// 8 r3   rO  airy_aic                     d|  dS )Nzairy_ai_forward(r-  r@   r  s    r1   r   r     s    (1- r3   special_airy_ai)rP  r   rC   	bessel_j0c                     d|  dS )Nzbessel_j0_forward(r-  r@   r  s    r1   r   r         *1#Q/ r3   c                     d|  dS )Nzlibdevice.j0(r-  r@   r  s    r1   r   r         =1- r3   special_bessel_j0)rP  r   r   rC   	bessel_j1c                     d|  dS )Nzbessel_j1_forward(r-  r@   r  s    r1   r   r     rW  r3   c                     d|  dS )Nzlibdevice.j1(r-  r@   r  s    r1   r   r     rY  r3   special_bessel_j1	bessel_y0c                     d|  dS )Nzbessel_y0_forward(r-  r@   r  s    r1   r   r     rW  r3   c                     d|  dS )Nzlibdevice.y0(r-  r@   r  s    r1   r   r     rY  r3   special_bessel_y0	bessel_y1c                     d|  dS )Nzbessel_y1_forward(r-  r@   r  s    r1   r   r     rW  r3   c                     d|  dS )Nzlibdevice.y1(r-  r@   r  s    r1   r   r     rY  r3   special_bessel_y1digammac                     d|  dS )Nzcalc_digamma(r-  r@   r  s    r1   r   r     s    aS* r3   c                     |  dS )Nz
.digamma()r@   r  s    r1   r   r     s    A3j) r3   )rP  r   rG  rC   r  c                     d|  dS )Nzcalc_erfcx(r-  r@   r  s    r1   r   r         A3a( r3   c                     d|  dS )Nzlibdevice.erfcx(r-  r@   r  s    r1   r   r     s    +A3a0 r3   special_erfcxfmac                     d|  d| d| dS )Nz	std::fma(r  r-  r@   r  r"  zs      r1   r   r     s    is"QCr!A6 r3   c                     d|  d| d| dS )Nzfmadd(r  r-  r@   rp  s      r1   r   r     s    s"QCr!A6 r3   c                     d|  d| d| dS )Nzlibdevice.fma(r  r-  r@   rp  s      r1   r   r     s    s"QCr!A> r3   )rP  r   rG  r   rC   igammac                     d|  d| dS Nzcalc_igamma(r  r-  r@   r!  s     r1   r   r         <s"QCq1 r3   igammacc                     d|  d| dS Nzcalc_igammac(r  r-  r@   r!  s     r1   r   r         =2aS2 r3   gammaincc                     d|  d| dS rv  r@   r!  s     r1   r   r     rw  r3   special_gammainc	gammainccc                     d|  d| dS rz  r@   r!  s     r1   r   r     r{  r3   special_gammaincci0c                     d|  dS )Nzcalc_i0(r-  r@   r  s    r1   r   r         1o r3   c                     d|  dS Nzlibdevice.cyl_bessel_i0(r-  r@   r  s    r1   r   r         3A3a8 r3   c                     |  dS )Nz.i0()r@   r  s    r1   r   r     s    A3e r3   )rP  r   r   rG  rC   i0ec                     d|  dS )Nz	calc_i0e(r-  r@   r  s    r1   r   r         	!A& r3   c                     |  dS )Nz.i0e()r@   r  s    r1   r   r     s    A3f r3   special_i0ei1c                     d|  dS )Nzcalc_i1(r-  r@   r  s    r1   r   r     r  r3   c                     d|  dS Nzlibdevice.cyl_bessel_i1(r-  r@   r  s    r1   r   r     r  r3   
special_i1i1ec                     d|  dS )Nz	calc_i1e(r-  r@   r  s    r1   r   r     r  r3   special_i1elog_ndtrc                     d|  dS )Nzcalc_log_ndtr(r-  r@   r  s    r1   r   r     s    qc+ r3   special_log_ndtrmodified_bessel_i0c                     d|  dS )Nzmodified_bessel_i0_forward(r-  r@   r  s    r1   r   r         3A3a8 r3   c                     d|  dS r  r@   r  s    r1   r   r     r  r3   special_modified_bessel_i0modified_bessel_i1c                     d|  dS )Nzmodified_bessel_i1_forward(r-  r@   r  s    r1   r   r     r  r3   c                     d|  dS r  r@   r  s    r1   r   r     r  r3   special_modified_bessel_i1modified_bessel_k0c                     d|  dS )Nzmodified_bessel_k0_forward(r-  r@   r  s    r1   r   r     r  r3   special_modified_bessel_k0modified_bessel_k1c                     d|  dS )Nzmodified_bessel_k1_forward(r-  r@   r  s    r1   r   r   	  r  r3   special_modified_bessel_k1ndtrc                     d|  dS )Nz
calc_ndtr(r-  r@   r  s    r1   r   r     s    
1#Q' r3   special_ndtrndtric                     d|  dS )Nzcalc_ndtri(r-  r@   r  s    r1   r   r     rk  r3   special_ndtri	polygammac                     d| d|  dS )Nzcalc_polygamma(r  r-  r@   r!  s     r1   r   r     s    ?1#Rs!4 r3   scaled_modified_bessel_k0c                     d|  dS )Nz"scaled_modified_bessel_k0_forward(r-  r@   r  s    r1   r   r          :1#Q? r3   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                     d|  dS )Nz"scaled_modified_bessel_k1_forward(r-  r@   r  s    r1   r   r   %  r  r3   !special_scaled_modified_bessel_k1spherical_bessel_j0c                     d|  dS )Nzspherical_bessel_j0_forward(r-  r@   r  s    r1   r   r   +  s    4QCq9 r3   special_spherical_bessel_j0zetac                     d|  d| dS )Nzzeta(r  r-  r@   r!  s     r1   r   r   0  s    52aS* r3   special_zetachebyshev_polynomial_tc                     d|  d| dS )Nzchebyshev_polynomial_t_forward(r  r-  r@   r!  s     r1   r   r   5      :1#Rs!D r3   special_chebyshev_polynomial_tchebyshev_polynomial_uc                     d|  d| dS )Nzchebyshev_polynomial_u_forward(r  r-  r@   r!  s     r1   r   r   :  r  r3   special_chebyshev_polynomial_uchebyshev_polynomial_vc                     d|  d| dS )Nzchebyshev_polynomial_v_forward(r  r-  r@   r!  s     r1   r   r   ?  r  r3   special_chebyshev_polynomial_vchebyshev_polynomial_wc                     d|  d| dS )Nzchebyshev_polynomial_w_forward(r  r-  r@   r!  s     r1   r   r   D  r  r3   special_chebyshev_polynomial_wlegendre_polynomial_pc                     d|  d| dS )Nzlegendre_polynomial_p_forward(r  r-  r@   r!  s     r1   r   r   I      9!BqcC r3   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                     d|  d| dS )Nz'shifted_chebyshev_polynomial_t_forward(r  r-  r@   r!  s     r1   r   r   N      B1#Rs!L r3   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                     d|  d| dS )Nz'shifted_chebyshev_polynomial_u_forward(r  r-  r@   r!  s     r1   r   r   S  r  r3   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                     d|  d| dS )Nz'shifted_chebyshev_polynomial_v_forward(r  r-  r@   r!  s     r1   r   r   X  r  r3   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                     d|  d| dS )Nz'shifted_chebyshev_polynomial_w_forward(r  r-  r@   r!  s     r1   r   r   ]  r  r3   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                     d|  d| dS )Nzhermite_polynomial_h_forward(r  r-  r@   r!  s     r1   r   r   b  s    82aSB r3   special_hermite_polynomial_hhermite_polynomial_hec                     d|  d| dS )Nzhermite_polynomial_he_forward(r  r-  r@   r!  s     r1   r   r   g  r  r3   special_hermite_polynomial_helaguerre_polynomial_lc                     d|  d| dS )Nzlaguerre_polynomial_l_forward(r  r-  r@   r!  s     r1   r   r   l  r  r3   special_laguerre_polynomial_lrH  hc                     | S rO   r@   r  s    r1   _typecheck_OpOverridesr  s  s    Hr3   c                   .     e Zd ZdZ fdZd Zd Z xZS )DeferredLinezHA line that can be 'unwritten' by adding name to V.graph.removed_buffersc                 V    t         |   |       || _        t        |t              rJ y rO   )r  r   rC   r   r   )rQ   rC   liner  s      r1   r   zDeferredLine.__init__z  s+    	d$45555r3   c                     t         fdt        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j
                  fD              r j                  S y )Nc              3   :   K   | ]  }j                   |v  y wrO   rC   )r   r  rQ   s     r1   r   z(DeferredLine.__call__.<locals>.<genexpr>  s"      
 IIQ
   )r   r(   r   removed_bufferskernelinplaced_to_remover  rP   s   `r1   __call__zDeferredLine.__call__  s_     
 ''((**++	
 
 99r3   c                 .    t        | j                  |      S rO   )r  rC   )rQ   r  s     r1   	_new_linezDeferredLine._new_line  s    DIIt,,r3   )r8   r9   r:   r;   r   r  r  r  r  s   @r1   r  r  w  s    R6
-r3   r  c                       e Zd ZddZy)BracesBufferc                 F     t         j                   fd       } |       S )Nc               3     K   t              D ](  } j                  d       xj                  dz  c_        * t               D ](  } xj                  dz  c_        j                  d       * d  t               D ](  } j                  d       xj                  dz  c_        * t              D ](  } xj                  dz  c_        j                  d       * y w)N{r   })range	writeline_indent)_rF   rQ   s    r1   ctxz BracesBuffer.indent.<locals>.ctx  s     6] "s#!" F7^ $!s#$ F7^ "s#!" 6] $!s#$s   C C#)
contextlibcontextmanager)rQ   rF   r   s   `` r1   indentzBracesBuffer.indent  s$    		"	"	$ 
#	$ ur3   N)r   )r8   r9   r:   r  r@   r3   r1   r  r    s    r3   r  c                   (    e Zd ZU eed<   ee   ed<   y)InplacedBuffer
inner_nameother_namesN)r8   r9   r:   rH   r>   r
   r@   r3   r1   r  r    s    Ocr3   r  c                       e Zd Zed        ZddZd Zd Zd Zd Z	d Z
d	ej                  d
efdZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)
KernelArgsc                 |    t        |t        t        j                  f      sJ ||vr|  t	        |       ||<   ||   S rO   )r   rH   r<   Symbolr   )prefixodictrC   s      r1   _lookupzKernelArgs._lookup  sC    $ell 3444u#HSZL1E$KT{r3   Nc                 R    i | _         i | _        i | _        |xs i | _        d | _        y rO   )input_buffersoutput_buffersinplace_buffersr  workspace_arg)rQ   r  s     r1   r   zKernelArgs.__init__  s-     ! B!r3   c                     dj                  dj                  t        t        | j                  | j
                  | j                  | j                  g                  S )NzKernelArgs({})r  )formatr=  r>  r  r  r  r  r  rP   s    r1   __repr__zKernelArgs.__repr__  sS    &&II**++,,	

 	
r3   c                 H    t        |t              xr |j                  d      S )NREMOVED)r   rH   r  r_   s     r1   _buffer_is_marked_removedz$KernelArgs._buffer_is_marked_removed  s    $$C)CCr3   c                    t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v r| j                  |   S || j                  v r| j                  |   j                  S |j                  d      r| j                  d| j                  |      S | j                  d| j                  |      S )Nseedin_ptr)r(   r   r"  mutation_real_namegetr  r  r  r  r  r  r  r_   s     r1   inputzKernelArgs.input  s    7777$$77;;D$GD1772228D824&&&&&t,,4'''''-888??6"<<(:(:DAA||Hd&8&8$??r3   c                 l   t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v r| j                  |   j                  S | j                  d| j                  |      S )Nout_ptr)
r(   r   r"  r  r  r  r  r  r  r  r_   s     r1   r  zKernelArgs.output  s    7777$$77;;D$GD1772228D824'''''-888||It':':DAAr3   c           	      f   || j                   vsJ || j                   v r:| j                   |   }|j                  j                  |       || j                   |<   y t        dt	        t        | j                   j                                      ||g      }|| j                   |<   || j                   |<   y )N
in_out_ptr)r  r  appendr  r   r"   values)rQ   
input_nameoutput_namebufs       r1   make_inplacezKernelArgs.make_inplace  s    $"6"6666---&&z2COO"";/03D  - S(<(<(C(C(E!FGHI[)C 03D  ,03D  -r3   r6   r7   c                     | j                   t        ||      | _         y| j                   j                  }|xs | j                   j                  }t        ||z   |      | _         d|fS )N)ws_ptrr   r+  )r  r5   r6   r7   )rQ   r6   r7   rF   s       r1   	workspacezKernelArgs.workspace  se    %!-fi!@D##**=!3!3!=!=	)&6/9Er3   c                     || j                   v r| j                   |   S | j                   j                         v r0 t        fd| j                   j                         D               | j                   |<   S )Nc              3   F   K   | ]  }|j                        sd   yw)r   N)r  )r   r   rC   s     r1   r   z)KernelArgs.seed_offset.<locals>.<genexpr>  s     U1!,,tBTQUs   !!)r  r%  sum)rQ   rC   r  s    ` r1   seed_offsetzKernelArgs.seed_offset  sr    DMM!==''4==''))&U(<(<(>UUVW   $er3   c                 x    t        |      dk(  rd| j                  d<   y| j                  d| j                  |      S )Nr  ks)rH   r  r  r_   s     r1   sizezKernelArgs.size  s6    t9$*DMM&!||D$--66r3   c                     t        | j                  j                         | j                  j                         | j                  j                               S rO   )r   r  r   r  r  rP   s    r1   
call_nameszKernelArgs.call_names  sA    ##%t':':'?'?'A4==CUCUCW
 	
r3   c                     |S rO   r@   )rQ   r(  rE   s      r1   wrap_ptr_argzKernelArgs.wrap_ptr_arg  s    
r3   c                     t        |      S rO   )rH   rQ   r3  s     r1   wrap_size_argzKernelArgs.wrap_size_arg  s    4yr3   c                    ddl m}m} g }g }g }t        | j                  j                               D ]  }| j                  |      r|j                  d   }|j                  }t        j                  j                  |      }	||	   }
|j                  |
 d|        |j                  | j                  ||	             |j                  |
 d        | j                  j                         D ]  \  }}|| j                  v rt        j                  j                  |      }	||	   }
|j                  d|
 d|        |j                  | j                  ||	             |j                  d|
 d        | j                   j                         D ]  \  }}|| j                  v s| j                  |      r&t        j                  j                  |      }	||	   }
|j                  |
 d|        |j                  | j                  ||	             |j                  |
 d        | j"                  j                         D ]  \  }}|j                  d| d|        |j                  | j%                  |             |j                  d|        t        j                  j&                  slt        j                  j&                  j)                  |        | j*                  J d       |||fS )	Nr   )DTYPE_TO_CPP
INDEX_TYPEr   z* rC  zconst r;  zWorkspace not supported on CPU )	cpp_utilsr<  r=  r"   r  r%  r  r  r  r(   r   r   r$  r7  r  r   r  r  r:  wrapper_codeensure_size_computedr  )rQ   r<  r=  	call_argsarg_defs	arg_typesinplacedouterinnerrE   	cpp_dtypes              r1   cpp_argdefszKernelArgs.cpp_argdefs  s   7		t33::<= 		.H--h7((,E''EGG%%e,E$U+IOOykE734T..ue<=	{!_-		. !..446 	4LE5,,,GG%%e,E$U+IOOfYKr%9:T..ue<=vi[23	4 !//557 	.LE5,,,0N0Nu0UGG%%e,E$U+IOOykE734T..ue<=	{!_-	. !MM//1 	ALE5OOfZL%9:T//67vj\23ww##$$99%@	A !!)L+LL)I--r3   c                    g }g }g }g }t        | j                  j                               D ]  }| j                  |      r|j	                  |j
                         |j	                  |j                  d          |j	                  t        j                  j                  |j                  d                |j	                  t        |j
                  |j                  d   t        j                  j                  |j                  d                       t        | j                  j                         | j                  j                               D ]  \  }}|| j                  v s| j                  |      r&|j	                  |       |j	                  |       |j	                  t        j                  j                  |             |j	                  t        ||t        j                  j                  |                    | j                  j                         D ]  \  }}|j	                  |       |j	                  |       |j	                  t!        |             |j	                  t#        ||             t        j                  j$                  sxt        j                  j$                  j'                  |        | j(                  =|j	                  d       |j	                  d       |j	                  | j(                         ||||fS )Nr   )rC   rD   rE   r+  r,  )r"   r  r%  r  r$  r  r  r(   r   r   rB   r   r  r   r  r  rX   rL   r?  r@  r  )rQ   rB  rA  rC  precompile_argsrD  rE  rF  s           r1   python_argdefszKernelArgs.python_argdefs;  s~    !	')	IKt33::<= 	H--h7OOH//0X11"56QWW..x/C/CB/GHI""!,,#//3''++H,@,@,DE	 "$$&(;(;(A(A(C
 	LE5 ,,,0N0Nu0UOOE"U#QWW..u56"" ''++E2	 !MM//1 	ALE5OOE"U#T%[)""75%#89ww##$$99%@	A )OOH%[)""4#5#56OY>>r3   c              #     K   t        | j                  j                               D ]  }| j                  |      r|j                  D ]  }|t
        j                  j                  v s|t
        j                  j                  v r<|| j                  v r| j                  |   |j                  f || j                  v sv| j                  |   |j                  f   y wrO   )r"   r  r%  r  r  r(   r   r  r  r  r  r  )rQ   rD  others      r1   aliaseszKernelArgs.aliasesi  s     t33::<= 	JH--h7!-- 	JQWW777 ; ;;D...,,U3X5H5HHHD///--e4h6I6III	J	Js   B:C="Cc                 ^      fd} || j                         xr  || j                        S )Nc                 8    | |vxs j                  ||          S rO   )r  )rC   buffersrQ   s     r1   _is_removedz*KernelArgs.is_removed.<locals>._is_removedy  s#    w&W$*H*HQU*WWr3   )r  r  )rQ   rC   rR  s   `  r1   
is_removedzKernelArgs.is_removedx  s7    	X 4!4!45 
+$&&;
 	
r3   c                 p   t               }t        | j                  j                               D ]2  }| j	                  |      r|j                  |j                  d          4 | j                  j                         D ]6  \  }}|| j                  v s| j	                  |      r&|j                  |       8 |S )Nr   )	r   r"   r  r%  r  r  r  r  r   )rQ   	live_outsrD  rE  rF  s        r1   live_output_bufferszKernelArgs.live_output_buffers  s    L	t33::<= 	4H--h7MM(..r23	4 !//557 	!LE5,,,0N0Nu0UMM% 	! r3   rO   )r8   r9   r:   r  r  r   r  r  r  r  r)  r<   r=   r?   r,  r0  r3  r5  r7  r:  rH  rK  rN  rS  rV  r@   r3   r1   r	  r	    s     "
D
@B4 

  t  7

'.R,?\J

r3   r	  c                   L    e Zd ZdZdee   fdZd ZdefdZ	de
fdZd Zd	 Zy
)r5  aD  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
    To do so, the backends can simply overload `Kernel.create_cse_var`
    The "CSEVariable.update_on_args" method gives you a hook for annotations
    See example of TritonCSEVariable in triton.py
    boundsc                 R    t        |t              sJ || _        || _        d| _        y r  )r   r   rC   rX  	use_count)rQ   rC   rX  s      r1   r   zCSEVariable.__init__  s'    &+...	r3   c                     | j                   S rO   r  rP   s    r1   __str__zCSEVariable.__str__  s    yyr3   r   c                 ,    t        | j                        S rO   )hashrC   rP   s    r1   __hash__zCSEVariable.__hash__  s    DIIr3   c                 f    t        |      t        |       k(  xr |j                  | j                  k(  S rO   )rX   rC   )rQ   rM  s     r1   __eq__zCSEVariable.__eq__  s'    E{d4j(DUZZ499-DDr3   c                      y rO   r@   )rQ   rC   r   r   s       r1   update_on_argszCSEVariable.update_on_args  s    r3   c                 N    | j                   j                   d| j                  dS )Nr,  r-  )r  r8   rC   rP   s    r1   r  zCSEVariable.__repr__  s$    ..))*!DII=::r3   N)r8   r9   r:   r;   r   r   r   r\  rc  r_  r?   ra  rc  r  r@   r3   r1   r5  r5    sA    [%5 # Et E;r3   r5  c                       e Zd Zd Zd Zy)CppWrapperKernelArgsc                 J    ddl m} t        j                  r|S d||    d| dS )Nr   )r<  r,  z*)(z.data_ptr()))r>  r<  r   abi_compatible)rQ   r(  rE   r<  s       r1   r7  z!CppWrapperKernelArgs.wrap_ptr_arg  s0    +   J|E*+3se<@@r3   c                     | S rO   r@   r9  s     r1   r:  z"CppWrapperKernelArgs.wrap_size_arg  s
    r3   N)r8   r9   r:   r7  r:  r@   r3   r1   rf  rf    s    Ar3   rf  c            	           e Zd ZdZ	 	 	 	 	 	 	 ddZdee   fdZd Z e	j                         dddd	ed
eeeeef   de	e   defdZ e	j                         fde	e   defdZy)CSEz Common subexpression eliminationNc                     || _         || _        i | _        || _        |xs i | _        |xs i | _        |xs t        j                         | _        t               | _
        |xs i | _        y rO   )r  suffixcachename_prefixstore_cachereduction_cache	itertoolsr0  iter_buffer_idsr   invalidated_storesvarname_map)rQ   r  rm  ro  iter_buffersrp  rq  ru  s           r1   r   zCSE.__init__  sf     
&&,".4"+@y/@",,&,"r3   	keep_varsc                 0   t        | j                  j                               D ]2  \  }}||vs| j                  |= | j                  j	                  |       4 | j
                  j                         D ci c]  \  }}||v s|| c}}| _        y c c}}w rO   )listrp  r   rt  r  rn  )rQ   rw  rC   tmpr   r   s         r1   
invalidatezCSE.invalidate  s    d..4467 	2ID#)#$$T*''++D1	2 (,zz'7'7'9Ltq!Q)^adL
Ls   6BBc                     t        | j                  | j                  | j                  | j                  | j
                  | j                        S )N)r  rm  ro  rv  rp  ru  )rk  r  rm  ro  rs  rp  ru  rP   s    r1   clonez	CSE.clone  sA    ;;;;((--((((
 	
r3   T)rX  write
assignmentrD   rM   rX  r   c                2   t        |t              r|j                  }t        |t        t        t
        f      sJ t        |             |s|sJ t        |t              r7|j                  j                  |      |_        |xj                  dz  c_	        |S t        |t
              r|j                         n|}| j                  j                  |d       }|s| j                  |      }|| j                  |<   |rt        j                  j                   r+t        j                  j                   j#                  |d       t        |t
              rP|r |j%                  | j&                   | d       |j)                  |       |j%                  | j*                         |S |r | j&                   | d| | j*                   }n| | j*                   }|j%                  |       |S |j                  j                  |      |_        |xj                  dz  c_	        |S )Nr   T)	only_oncez =z = )r   r%   r  rH   r5  r   rX   rX  tightenrZ  getvaluern  r  newvarr(   r  current_nodecodegen_originating_infor  r  splicerm  )	rQ   rD   rM   rX  r~  r  	cache_keyvarr  s	            r1   generatezCSE.generate  s    dH%::D$k> BCOT$ZOC
""dK( ++--f5DKNNaNK'1$'GDMMOT	jjnnY-++f%C$'DJJy!88((HH))BB$ C  dN3!((DKK=R)@AMM$'$$T[[1 
 ""&++se3tfT[[MJ"&}5$$T*
 
 ++F3CJMMQM
r3   c                     | j                    t        | j                         }t        j                  j                  ||      }|| j                  |<   |S rO   )ro  nextrs  r(   r  create_cse_varru  )rQ   rX  var_namer  s       r1   r  z
CSE.newvar  sN    &&'T-A-A(B'CDhh%%h7%("
r3   )r4  r4  rz  NNNN)r8   r9   r:   r;   r   r   rH   r{  r}  r   unknownr   r   r5  r%   r   r  r  r@   r3   r1   rk  rk    s    * -(MJsO M	
  $7;#6#6#8.. Ch>?.
 C . 
.` 1D0C0C0E [- + r3   rk  c                   ,     e Zd Zd fdZd Zd Z xZS )CodeGenc                 T    t         |           t        j                         | _        y rO   )r  r   r  	ExitStack
exit_stack)rQ   r  s    r1   r   zCodeGen.__init__  s    $..0r3   c                 :    | j                   j                          | S rO   )r  	__enter__rP   s    r1   r  zCodeGen.__enter__!  s    !!#r3   c                 >    | j                   j                  |||       y rO   )r  __exit__)rQ   exc_typeexc_valexc_tbs       r1   r  zCodeGen.__exit__%  s      7F;r3   r%  )r8   r9   r:   r   r  r  r  r  s   @r1   r  r    s    1<r3   r  c                   ,    e Zd Zd Zd Zd Zd ZddZy)
ScopedDictc                      || _         i | _        y rO   )original_dict	new_items)rQ   r  s     r1   r   zScopedDict.__init__*  s    *r3   c                 Z    || j                   v r| j                   |   S | j                  |   S rO   r  r  rQ   r   s     r1   __getitem__zScopedDict.__getitem__.  s.    $.. >>#&&!!#&&r3   c                 "    || j                   |<   y rO   )r  )rQ   r   r  s      r1   __setitem__zScopedDict.__setitem__3  s    #sr3   c                 >    || j                   v xs || j                  v S rO   r  r  s     r1   __contains__zScopedDict.__contains__6  s!    dnn$At/A/A(AAr3   Nc                 t    || j                   v r| j                   |   S | j                  j                  ||      S rO   )r  r  r  )rQ   r   defaults      r1   r  zScopedDict.get9  s6    $.. >>#&&!!%%c733r3   rO   )r8   r9   r:   r   r  r  r  r  r@   r3   r1   r  r  )  s    '
$B4r3   r  c                       e Zd ZU dZdZdZeeee	   gee	   f      e
d<   dZde
d<   dZde
d<   d2 fd	Zej                  d        Zej                  d3d       Zd	ed
ej(                  defdZd	ed
ej(                  fdZd	ed
ej(                  defdZ	 d4d	ed
ej(                  dededdf
dZdej8                  dej8                  dedeeeedf   f   deeeedf   f   f
dZ deej8                  df   deeedf   eedf   geedf   f   deedf   deedf   fdZ!deej8                  df   deedf   de"de"deedf   f
dZ#d Z$deded ej(                  d!ej8                  d"e"defd#Z%e&defd$       Z'	 d4d%eeef   d&ee   d'ee   d(eeeef      def
d)Z(d*ej(                  d+ej(                  d&e"d'e"fd,Z)d
ej(                  defd-Z* fd.Z+ fd/Z,dej(                  fd0Z-d1 Z. xZ/S )5Kernelr4  N	overridesload_formatstore_formatc                    t         |           |rt        xj                  dz  c_        |xs
 t	               | _        t               | _        t               | _        t               | _	        d| _
        d| _        t        | j                  | j                        | _        t!               | _        t!               | _        d | _        d | _        d | _        d | _        t!               | _        t!               | _        i | _        d| _        d | _        y )Nr   r   )r  r   r   generated_kernel_countr	  r   r   loadscomputestoresnum_loadnum_reductionrk  newvar_prefixrm  cser   must_keep_buffersstore_buffer_names
_load_mask_load_otherr  node_to_boundsr  r  inplace_update_buffersmin_elem_per_threadkernel_name)rQ   r   increase_kernel_countr  s      r1   r   zKernel.__init__G  s     **a/*(JL	#%
%'$&D..<!+",, OS)|",,
 ')##$ r3   c              #      K   | j                   }|| _         |j                  j                         j                         | _        	 d  || _         y # || _         w xY wwrO   )r  r#  rX  
get_boundsr  )rQ   r   priors      r1   set_current_nodezKernel.set_current_nodeg  sO     !! "jj//1<<>	& %DDs   AAA A	AAc              #   B  K   d }||}| j                   }| j                  }| j                  }| j                  }|| _         || _        || _         ||      | _        	 d  || _         || _        || _        || _        y # || _         || _        || _        || _        w xY ww)Nc                     | j                         }t        | j                        |_        t        | j                        |_        t        | j                        |_        |S rO   )r}  r  rn  rq  rp  )r  new_cses     r1   	scope_csez&Kernel.swap_buffers.<locals>.scope_cses  sF    iikG&syy1GM&01D1D&EG#",S__"=GNr3   )r  r  r  r  )	rQ   lbcbsbr  r  r  r  r  s	            r1   swap_bufferszKernel.swap_buffersq  s     	 :B

,,hh
S>	DJ"DL DKDH DJ"DL DKDHs   ABA> !B>BBrC   r   r   c                     t         rO   r]   )rQ   rC   r   s      r1   r   zKernel.load  ra   r3   c                     | j                   }	 | j                  | _         | j                  ||      || _         S # || _         w xY w)z+A load the depends on an index we have read)r  r  r   )rQ   rC   r   r  s       r1   indirect_loadzKernel.indirect_load  s8    

	DJ99T5)DJDJs	   "8 	Ar  c                     t         rO   r]   )rQ   rC   r   r  s       r1   r   zKernel.store_reduction  ra   r3   modec                     t         rO   r]   )rQ   rC   r   r  r  s        r1   r   zKernel.store  
     "!r3   rE   	src_dtypereduction_type.c                     t         rO   r]   )rQ   rE   r  r  r  s        r1   r   zKernel.reduction  
     "!r3   dtypes
combine_fnr%  c                     t         rO   r]   )rQ   r  r  r%  s       r1   scanzKernel.scan  s
     "!r3   stable
descendingc                     t         rO   r]   )rQ   r  r%  r  r  s        r1   sortzKernel.sort  r  r3   c                     t         rO   r]   rP   s    r1   
var_rangeszKernel.var_ranges  ra   r3   offsets_nameoffsets_sizeindexing_dtyperightc                     t         )z3
        See [Note: Inductor bucketize op]
        r]   )rQ   r%  r  r  r  r  s         r1   	bucketizezKernel.bucketize  s
     "!r3   c                     t         rO   r]   rP   s    r1   assert_functionzKernel.assert_function  s    !!r3   r  loweruppermaskc           	      Z   t        |t              rt        |      }t        |t              sJ |t        |t              sJ |t        |t              sJ |r|rd| d| d| d| d	}| d| d| }n|r
| d| }|}n|sJ | d| }|}|r	d| d| d}| j                   d| d| dS )	Nr,  z <= z) & (z < r-  z) | ~(z, "index out of bounds: z"))r   r5  rH   r  )rQ   r  r  r  r  r3  
cond_prints          r1   indirect_assertzKernel.indirect_assert  s     c;'c(C#s###}
5# 666}
5# 666U ugT#eC5E7!<D!7$se3ug6JWD&DJL5U#eW%DJtfF4&*D&&'q.FzlRTUUr3   rM   r3  c                     t         rO   r]   )rQ   rM   r3  r  r  s        r1   check_boundszKernel.check_bounds  r  r3   c                     t         rO   r]   )rQ   r   s     r1   index_to_strzKernel.index_to_str  ra   r3   c                      G  fdd      ddt         t           fd}t                    j                  sJ  j	                  t        j                                j                  j                  t        j                                        j                  j                  t        j                                 S )Nc                      e Zd ZdW _         e       Zededede	f   f fd       Z
e fd       Ze	 	 d'de	deej                  ef   d	effd
       Zedej                  dej                  dedeffd       Zededej                  de	ffd       Zedede	ffd       Ze	 d(dedej                  de	deddf
 fd       Zededej                  de	f fd       Zedej2                  dej2                  dedee	ee	df   f   dee	ee	df   f   f
fd       Zedeej2                  df   deee	df   ee	df   gee	df   f   dee	df   dee	df   ffd       Zedeej2                  df   dee	df   ded edee	df   f
fd!       Zede	d"ed#ej                  d$ej2                  d%ede	ffd&       Zy))"Kernel.__enter__.<locals>.CSEProxyCSEProxyrC   r   .c                       fd}|S )Nc                        j                   g i  t               i } fd}t        j                  ||      S )Nc                     t         j                  j                  j                  t         j                  j                  |       }|j                         |S )NrX  )r(   r  r  r  r  rc  )r   csevarr   rX  r   rC   s     r1   do_csezMKernel.__enter__.<locals>.CSEProxy.__getattr__.<locals>.inner.<locals>.do_cse  sJ    !"!6!6HH,,a "7 " --dD&A%r3   )_bound_variabler  pytreetree_map)r   r   r  r  rX  r  rC   parent_handlers   ``  @r1   rF  z=Kernel.__enter__.<locals>.CSEProxy.__getattr__.<locals>.inner  sO    5X55dLTLVLF9GND94J6JE& "??6599r3   r@   )rC   rF  r  r  s   ` r1   r  z.Kernel.__enter__.<locals>.CSEProxy.__getattr__  s    : r3   c                    ddl m} t        t        j                  |      rt        j                         S t        j                  j                  j                  | k(  rVj                  Jt        j                  t              sJ j                  j                  t        j                               S t        j                  rmt        t         |       r]t#        fddD              rt        j                         S |rJ d }t%        t'        ||            } t)        j*                  |       | S t        j                         S )z
                If the variable comes from an FX node, we forward the bound we have already computed
                Else, if the variable when codegen'ing another op, we try to compute its bounds
                r   )TritonTemplateKernelc              3   :   K   | ]  }|j                   v   y wrO   )r	  )r   sfx_nodes     r1   r   zEKernel.__enter__.<locals>.CSEProxy._bound_variable.<locals>.<genexpr>!  s"       W^^+r  )set_indirectr   r  c                     t        | t              r| j                  S t        | t        j                        rt        |       S | S rO   )r   r5  rX  r<   r=   r   r  s    r1   arg_to_boundzHKernel.__enter__.<locals>.CSEProxy._bound_variable.<locals>.arg_to_bound-  s2    %a5#$88O'5::6#.q>1#$Hr3   )select_algorithmr  r   r(   r  r   r  interpreterr  r	  r  dictr  r   compute_all_boundsr  r   anyry  r>  r  vr_analysis)	rC   r   r   r  r
  
arg_boundsr  r  rQ   s	         @r1   r  z2Kernel.__enter__.<locals>.CSEProxy._bound_variable  s    Dahh(<=&..00--44>>T)d.A.A.M%d&9&94@@@..227K<O<O<QRR..7;Mt3T  !F   +2244  &%:% "&c,&=!>J>78#7#7>
KK&..00r3   r  r3  checkc                    t        |t              rt        j                  |      }t        |t        j                        sJ |       | j
                  j                  dk  ry|rt        j                  | t        j                  |t        j                              }| j
                  j                  dk\  r0t        j                  | d      }t        j                  |||       }n| }t        j                          }| j
                  t        j                          k7  rt        |t        j"                        r| j
                  t        t$         d      z  }t        |j                  |z   |j                  |z         }| j
                  j                  dk\  r"| j
                  t        dt$              z  }||z  }j&                  j)                  j*                  ||      } j-                  | ||      }	t/        |      re| j
                  j                  dk\   }
t        |t        j"                         xs | j
                  j                  |k   }j1                  |	||
|       |	S )Nr   r   r  )r   rc  r<   rJ   r=   rX  r  r#   r  r   rI   longr  r   r0  r   r  Numberr   r  r  r  indirect_indexingr   r  )r  r3  r  wrap_negstmr   
new_bounds
neg_boundspos	sympy_varassert_lowerassert_upperr  rQ   s               r1   r  z4Kernel.__enter__.<locals>.CSEProxy.indirect_indexing:  s    dC( ==.D!$

39T93 ::##a'!ggc3>>$

+KL::++q0!$QB"%))BS"9C! "-!4!4!6Jzz[%8%8%::zell@ &)ZZ+vgr2J%J
%0&,,t3Z5E5E5L&
 ::++q0"%**{1f/E"EC)3c)9J((++DLL#j+QC*<<S$N	"5)(+

(8(8A(=#>L'1$'E#E $

((4/JL %%i|\R  r3   rM   r  r  c                 ,    j                  | |||      S rO   )r  )rM   r3  r  r  rQ   s       r1   r  z/Kernel.__enter__.<locals>.CSEProxy.check_boundsm  s     ((tUEBBr3   r   c                    | j                   j                  v r)t        j                  j                  j                  |        t        |t        j                        rj                  | |      S j                   j                  }| |v r||    S j                  | |      }|j                  dk(  rxj                  dz  c_        |S r  )r  rt  r(   r  r  r  r   r   TMPr  rp  r   rZ  r  )rC   r   rp  outrQ   s       r1   r   z'Kernel.__enter__.<locals>.CSEProxy.loads  s    488666 HH..2248&udhh7--dE::"hh22;&&t,,iie, ==A%MMQ&M
r3   r  c                    |j                   j                  | <   j                  rg| t        j                  j
                  v rJj                  j                  |       }|j                         D ]  }|j                   j                  |<    y y y rO   )r  rp  r  r(   r   name_to_buffer
get_outputget_mutations)rC   r  r(  
other_namerQ   s       r1   _update_store_cachez6Kernel.__enter__.<locals>.CSEProxy._update_store_cache  s~    -2$$T*$$1G1G)G++66t<C&)&7&7&9 A
;@,,Z8A *H$r3   Nr  c                     j                   j                  |        |j                  | |       | t        j                  j
                  vrj                  | |||      S y )N)r  )r  r  r(  r(   r   r  r   )rC   r   r  r  r  rQ   s       r1   r   z(Kernel.__enter__.<locals>.CSEProxy.store  sZ     ''++D1<00u=qww666::dE5t:DDr3   c                     j                   j                  |        j                  | |       | t        j                  j
                  vrj                  | ||      S y rO   )r  r  r(  r(   r   r  r   )rC   r   r  r  rQ   s      r1   r   z2Kernel.__enter__.<locals>.CSEProxy.store_reduction  sS    ''++D1,,T59qww666//eUCC 7r3   rE   r  r  c                 V    xj                   dz  c_         j                  | |||      S r  )r  r   )rE   r  r  r  rQ   s       r1   r   z,Kernel.__enter__.<locals>.CSEProxy.reduction  s+     ""a'"~~eYNNr3   r  r  r%  c                 *    j                  | ||      S rO   )r  )r  r  r%  rQ   s      r1   r  z'Kernel.__enter__.<locals>.CSEProxy.scan  s     yyV<<r3   r  r  c                 ,    j                  | |||      S rO   )r  )r  r%  r  r  rQ   s       r1   r  z'Kernel.__enter__.<locals>.CSEProxy.sort  s     yyDDr3   r  r  r  r  c                 .    j                  | ||||      S )ay  
                [Note: Inductor bucketize op]

                Given values (tensor) and offsets_name (reference to the name of a 1D
                tensor), calculate the bucket that each value belongs to.

                e.g. for values [-1, 0, 1, 2, 3, 4, 5, 9], offsets [0, 4, 4, 8], right=True
                return =        [ 0, 1, 1, 1, 1, 3, 3, 4].

                When right == False, bucket i refers to range (offsets[i], offsets[i+1]].
                When right == True,  bucket i refers to range [offsets[i], offsets[i+1]).

                Offsets must be non-decreasing or the result is undefined.
                )r  )r%  r  r  r  r  rQ   s        r1   r  z,Kernel.__enter__.<locals>.CSEProxy.bucketize  s!    , ~~L, r3   )TTrO   ) r8   r9   r:   rC   r   r  r  rH   r   r5  r  r  r   r<   r=   rc  r?   r  r  r   r(  r'   r   r   rI   rE   r&   r   r   r  r  r  )r  r  rQ   s   r1   r  r    sd   "DI,.K# (33C*D  " (1 (1T  #	0! 0!EJJO,0! 0! 0!d CjjC(-

C;?CHLC C
 3 uzz k  " A# Ak A A TX	 	 "'**	 5@	 HQ	 	  	  Dc D%** D[ D D O{{O ;;O !.O [%S0@*AAB	O
 {E+s*:$;;<O O =ekk3./=$;+,eK4D.EF+s*+-= k3./= {C'(= = Eekk3./Ek3./E E !	E
 {C'(E E #! $jj !&	
   r3   r  r  r   c                     | S rO   r@   r  s    r1   _typecheck_CSEProxyz-Kernel.__enter__.<locals>._typecheck_CSEProxy  s    Hr3   )r$   r5  r  r  r  r(   get_ops_handlerr  enter_contextset_ops_handlerset_kernel_handler)rQ   r0  r  r  r  s   ` @@r1   r  zKernel.__enter__  s    ^	 ^	B	8 	
;0G 	 	~~~(9(9(;<%%a&7&7
&CD%%a&:&:4&@Ar3   c                     t         j                  j                  r(t         j                  j                  j                          t        |   |||       y)zj
        Note that V.graph.scheduler can be None when codegening triton template
        kernels.
        N)r(   r   r"  remove_kernel_local_buffersr  r  )rQ   r  r  r  r  s       r1   r  zKernel.__exit__  s9    
 77GG99;7F3r3   c           	         t        |t        t        f      r|D cg c]  }| j                  |       c}S t        j
                  j                  j                  |      }t        |j                  d       }|D ci c]W  }t        |t        j                  t        j                  t        j                  f      r|| j                  j!                  |      Y }}t#        ||      S c c}w c c}w )Nc                     | j                   S rO   r  )r  s    r1   r   z(Kernel.rename_indexing.<locals>.<lambda>  s
    !&& r3   )r   )r   ry  tuplerename_indexingr(   r   r  r  sortedfree_symbolsr   r   UNBACKED_INTSIZEPRECOMPUTED_SIZEr   r3  r!   )rQ   r   r  sorted_symbolsreplacementss        r1   r:  zKernel.rename_indexing  s     edE]+5:;D((+;;  ))%0 2 28HI $
%%II)) tyy~~a  
 
 %.. <
s   C%;AC*c                     t        |i |S rO   )r5  )rQ   r   r   s      r1   r  zKernel.create_cse_var  s    D+F++r3   )NT)NNrO   )0r8   r9   r:   r  rm  r  r   r   r$   r   r>   r  r  r   r  r  r  r  rH   r<   r=   r5  r   r  r   r'   r   rI   rE   r&   r   r   r   r  r?   r  r  r  rR   r  r  r  r  r  r  r:  r  r  r  s   @r1   r  r  ?  sq   MFHLIx*S/!2JsO!CDELKL$ @ & &  4" "UZZ "K "# ejj "C "

 "; " SW"" %

"3>"FO"	"
"{{" ;;" &	"
 [%S(8"99:" 
{E+s"233	4""ekk3&'" ;#$eK,<&=>kSVFV@WW
" k3&'" 
{C	 ""ekk3&'" k3&'" 	"
 " 
{C	 """" " jj	"
 " " 
" " " " 37V;#$V }V }	V
 u[#-./V 
V<"JJ"&+jj"9="FJ"
"%** " "kZ4/

 /*,r3   r  c                   X    e Zd ZU dZee   ed<   dZee	j                     ed<   dZ
eed<   y)r   r  r   NrE   r4  ops_name)r8   r9   r:   r   r   rH   r>   rE   r   rI   rD  r@   r3   r1   r   r     s-    "C#"#'E8EKK 'Hcr3   r   c                  b    	 dd l } | j                  | j                        S # t        $ r Y y w xY w)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)rG  s    r1   
jinja2_envrK    s?    !!,, " 
 	
  s   " 	..c                   f    e Zd ZdZeddedefd       Zed        Zed        Z	defdZ
d	 Zdd
Zy)KernelTemplatezg
    Base class for defining kernel templates.

    Children classes: TritonTemplate, CUDATemplate
    sourcenum_indentsc                     | j                  d      }t        |      dkD  r|dd  D cg c]  }d|z  |z  |z    c}|dd  dj                  |      S c c}w )NTr   r;  r4  )
splitlinesr   r=  )rN  rO  indents_spacinglinesr  s        r1   indent_except_firstz"KernelTemplate.indent_except_first!  sb    !!$'u:>INqrAE&4<E!"I wwu~s   Ac                     t               }|@t        j                  |j                  d<   ddlm}  G d d|      }	 |j                  |       S y # |$ r} ||      |d }~ww xY w)NrT  r   )TemplateSyntaxErrorc                   $     e Zd Z fdZd Z xZS )IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrorc                     t         |   |j                  |j                  |j                  |j
                         || _        y rO   )r  r   messagelinenorC   filenameoriginal_error)rQ   r]  r  s     r1   r   zRKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__2  s>    G$&..&--&++&//	 +9D'r3   c                 F   d| j                    d}|d| j                   dz  }t        | j                  d      r| j                  j                  j                  d      }|dz  }t        d| j                   dz
        }t        t        |      | j                   dz         }t        ||      D ]s  }|| j                   dz
  k(  rN||dz    d	||    dz  }t        | j                  d
      s=|dd| j                  j                  dz
  z  z   dz   z  }c||dz    d||    dz  }u |S )NzError in template at line 
zError message: rN  z	Context:
r   r   r   z: --> columnz     r;  z^
z:     )r[  rZ  r  r]  rN  splitmaxminr   r  r`  )rQ   
error_inforS  startendr1  s         r1   r\  zQKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__;  sA   #=dkk]"!MJODLL>"DDJt22H= $ 3 3 : : @ @ F"l2
 #At{{Q 7!#e*dkkAo>!&uc!2 
IA DKK!O3 *1VE!H:R.H H
#*4+>+>#I$.(/*-1D1D1K1Ka1O*P)Q*/)0%&J !+1VE!H:R.H H

I &%r3   )r8   r9   r:   r   r\  r  r  s   @r1   DetailedTemplateSyntaxErrorrX  1  s    9&r3   rg  )rK  rM  rT  filtersrG  rV  from_string)rN  envrV  rg  es        r1   _template_from_stringz$KernelTemplate._template_from_string*  sk    l?1?1S1SCKK-.2&.A &><v..  ' <1!4!;<s   A A!	AA!c                 J     t         j                  j                   fd}|S )Nc                 Z    | j                         k(  rj                         S  |       S rO   )get_namer   )rC   _get_dtype_realfake_outs    r1   r   z1KernelTemplate._fake_get_dtype.<locals>.get_dtype[  s.    x((**))++"4((r3   )r(   r   r   )rq  r   rp  s   ` @r1   _fake_get_dtypezKernelTemplate._fake_get_dtypeW  s    ''++	)
 r3   rC   c                     || _         y rO   r  r_   s     r1   r   zKernelTemplate.__init__b  s	    	r3   c                 t    	 |j                   | j                  di |       y# t        $ r
}Y d}~yd}~ww xY w)z
        Maybe generates a new ChoiceCaller and appends it into existing choices.

        choices: A list of ChoiceCallers.
        kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
        Nr@   )r$  r  r^   )rQ   choicesr   rk  s       r1   maybe_append_choicez"KernelTemplate.maybe_append_choicee  s4    	NN=4==2623" 		s   !$ 	77c                     t         )zM
        Generates a ChoiceCaller instance from the given arguments.
        r]   )rQ   r   s     r1   r  zKernelTemplate.generater  s
    
 "!r3   N)   )r   ztorch._inductor.ir.ChoiceCaller)r8   r9   r:   r;   r  rH   rc  rT  rl  rr  r   rv  r  r@   r3   r1   rM  rM    sh     C c   * *X  S "r3   rM  )Fr@   )r  dataclassesr  rr  r-   r  r  r6  enumr   r   r   typingr   r   r   r	   r
   r   r   r   r   r<   sympy.printing.printerr   rI   torch.fxtorch._prims_commonr   torch.utilsr   r   torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   r   r4  r   r   utilsr   r   r   r    r!   r"   virtualizedr#   r$   r%   r&   r'   r(   _logginggetArtifactLoggerr8   r+   r2   	dataclassr5   rB   rL   rT   KernelArgTyperY   rH   r>   r[   rj   rX   rp   rr   rk   r   r   r   r?   r   	lru_cacher~   r=   r   r   r   r   bfloat16r   float16r  float64int8int16r  r   uint8uint16uint32uint64r   rE   r   r   r)  r  r  rO  r  INT_TO_FLOATrH  r  r  r  r  r	  r5  rf  rk  r  r  r  r   rK  rM  )rE   s   0r1   <module>r     s	          	  
 
 
  *   ? ) / - O O X X   Q P ~~//*E=
    # # #    + + + lIw67,.c=() ." " :< $s$556 ;8 (,Dz	 ! !%	
&T 
&	9u||S'8!9 	93Uc U	3 	T 	 T2 2jUD$4 U; ;CT ;0C 0 T   
NNEKK	MM5;; JJMMMMJJKKKKKKKKLLLLLL
 	u ,(( ekk	(V_; _;HM%' M%`D:K D:Nc7 c7L 	 	 	 6: _6;HH-_6 ;HH/- 	_6 ;HH/- 	_6$ ;HH/- 	%_60 ;HH/- 	1_6< ;HH*)	=_6L ;HH(0	M_6X 	;HH66>	Y_6h ;HH1i_6r ;HH2s_6| ;HH1}_6F ;HH2 G_6P ;HH%8$Q_6^ 	;HH&%		__6j ;HH%8	k_6v 	;HH&	w_6@ ;HH+A_6L %;HH88)	M_6X %;HH88)	Y_6d %;HH8)e_6n %;HH8)o_6z 
;HH'
{_6D ;HH(E_6N ;HH4O_6\ ,;HH?0]_6f ,;HH?0g_6r &;HH9*s_6| 
;HH*
}_6F );HHD-G_6P );HHD-Q_6Z );HHD-[_6d );HHD-e_6n (;HHC,o_6x $1;HHL5$y_6B $1;HHL5$C_6L $1;HHL5$M_6V $1;HHL5$W_6` ';HHB+a_6j (;HHC,k_6t (;HHC,u_6 $sM12 _Fk jo -# -2> *Z 
c cL; ;:: ] ]@
< 
<4 4,D,W D,N    T ]" ]"q7s   
]4