
    sg                         d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZ d d	lmZ d
dlmZ d Zdej@                  jB                  fdZ"dej@                  jB                  dee#   fdZ$dej@                  jB                  dee#   fdZ%de&fdZ'deee#      fdZ(d Z) G d d      Z* ed e*              ddZ+y)    N)defaultdict)DictListOptional)config)aot_autograd)	boxed_nop)BoxedDeviceIndex'check_multiple_devices_or_any_cpu_nodesformat_default_skip_messageget_mutation_stack_traceget_placeholder_info#log_cudagraph_skip_and_bump_counter)	BoxedBoolcount_tangents%get_first_incompatible_cudagraph_nodenum_fw_fixed_argumentsoutput_node)StorageWeakRef   )register_backendc           	      t   d }t        t              }d}t               }| j                  D ]  }|j                  dk(  rkt	         ||j
                        t        j                        r;|t         ||j
                        j                                  j                  |       |dz  }~|j                  dk(  st        |j                  d      s|j                  j                  }t        |j                        D ]  \  }}|t!        |j"                        k  r|j"                  |   }	n2|j$                  |j&                  vrG|j&                  |j$                     }	d}
|j(                  r|j(                  j*                  rd}
|
s||t         ||	j
                        j                                  z  }  |S )	Nc                     d| v r| d   S | d   S )Nvalfake_result )metas    T/var/www/html/venv/lib/python3.12/site-packages/torch/_dynamo/backends/cudagraphs.pymeta_fkz%find_input_mutations.<locals>.meta_fk    s    #tmtE{Dm1DD    r   placeholderr   call_function_schemaFT)r   setnodesop
isinstancer   torchTensorr   _typed_storageaddhasattrtargetr#   	enumerate	argumentslenargsnamekwargs
alias_infois_write)gr   inputs	input_idxmutated_inputsnschemaiargargumentmut_args              r   find_input_mutationsr@      sh   E FIUNWW 44= '!&&/5<<8~gaffo&D&D&FGHLLYWNITT_$188Y/XX%%F#F$4$45 3s166{? vvayHxxqxx/  xx1H>>~~.."& #f&wx}}'='L'L'NO' N: r    gmc                     i }| j                   j                  D ]W  }|j                  j                  dd       }t	        |t
        j                        s:|j                  |vsI|||j                  <   Y |S )Nr   )graphr%   r   getr'   r(   r)   device)rA   device_node_mappingr:   ts       r   get_device_node_mappingrH   F   sd    =?XX^^ .FFJJud#a&188;N+N,-). r    	aot_modelreturnc                     t        | j                        t        t        |            z
  }|sy t	        | j                        }t        ||      S N)r@   rC   r$   ranger   r   )rI   	num_fixedmutation_indicesplaceholderss       r   3check_for_mutation_ignore_cuda_graph_managed_tensorrQ   O   sD     ,IOO<s5CS?TT'	8L#L2BCCr    c                     t         j                  st        | |      x}r|S t        t	        |             x}r|S t        |       x}rt        d|j                   d      S y )Nzincompatible op ())r   (cudagraph_backend_support_input_mutationrQ   r   rH   r   r   r2   )rI   rN   mut_skipskipnodes        r   check_for_skiprX   Z   sz    ::Jy
 
8 
 O6	* t  4Y??t?*->tyyk+KLLr    c                 v    t        t        t        |                   }|j                  dk(  sJ |j                  S )Ncuda)nextiterrH   typeindex)rA   rE   s     r   get_device_indexr_   l   s3    $.r234F;;&   <<r    c                    t        |       }t        |j                        dk(  sJ |j                  d   D cg c]>  }t        |t        j
                  j                  j                        r|j                  nd @ c}S c c}w )Nr   r   )	r   r0   r1   r'   r(   fxrW   Nodestack_trace)rA   outputr=   s      r   get_stack_tracesre   r   sl    _Fv{{q    ;;q> 'sEHHMM,>,>?T	I  s   AA=c                     ddl m t        d      t        d       dfd	}fd}t	        ||t        j                  |d      t        j                  j                  j                        } ||       S )	Nr   )cudagraphify_implTc                    t        | |      }t        t        
      t        |            }t        | |      x}r%t	        j
                  	       t        d|        |S j                  t        |               ||t        |      j                  ddt        |       t        | j                        t        | j                        	      }d|_        |S )Nzskipping cudagraphs due to Fdevice_indexis_backwardis_inferencestack_tracesrP   mutated_input_idxsT)r	   r   r0   rX   r   disabler   r$   r_   rM   valuere   r   rC   r@   _boxed_call)rI   
aot_inputsrl   interpfixedskip_msgoutboxed_device_indexrg   do_cudagraphsdynamo_inputss          r   forward_cudagraphsz&cudagraphs.<locals>.forward_cudagraphs   s    9j1&s='93z?K%i7787m,/-hZ8 M/	:;%L+11))4-ioo>3IOOD

 
r    c                     t         |      }
s S t               }t         |      x}rTt        d|       t        j
                  j                  j                  j                  d      J  fd}d|_	        |S  	||t        |      t               ddt               t         j                        t         j                        	      }d|_	        |S )Nzskipping cudagraphs due to %sF)create_if_none_existsc                 4    j                           |       S rL   )set_to_running_backward)r7   rI   managers    r   fnz3cudagraphs.<locals>.backward_cudagraphs.<locals>.fn   s    //1 ((r    Tri   )r	   r   rX   r   r(   	_inductorcudagraph_treesget_managerrp   rq   rM   r_   re   r   rC   r@   )rI   rr   rs   rt   ru   r   rv   r   rw   rg   rx   s   `      @r   backward_cudagraphsz'cudagraphs.<locals>.backward_cudagraphs   s    9j1y)%i7787//
 oo55AA"(( B G &&&) "BNI%L))4))4-ioo>3IOOD

 
r    )rl   )fw_compilerbw_compilerinference_compilerkeep_inference_input_mutations)F)torch._inductor.cudagraph_treesrg   r   r
   r   	functoolspartialr(   _dynamor   %cudagraph_backend_keep_input_mutation)dynamo_modelry   rz   r   aot_cudagraphsrw   rg   rx   s    `   @@@r   
cudagraphsr   {   so    AdOM)$/ 2$L "&'$,,-?dS',}}';';'a'a	N ,66r    c                   0    e Zd ZdZed        Zed        Zy)CudagraphsBackendr   c                      ddl m}   |         y )Nr   reset_cudagraph_trees)r   r   r   s    r   resetzCudagraphsBackend.reset   s    Ir    c                     t        | |      S rL   )r   )modelr7   s     r   __call__zCudagraphsBackend.__call__   s    %((r    N)__name__
__module____qualname__compiler_namestaticmethodr   r   r   r    r   r   r      s-     M   
 ) )r    r   r   )r2   compiler_fnc                   	 t        |t        t        f      sJ r$|D cg c]  }t        j                  |       c}nt        |      t        j
                  j                          t        j
                  j                         }|j                  t        j
                  j                                t        j
                  j                  |      5   | |  ddd       |j                          t        j
                  j                         j                  |       t        j
                  j                          t        j
                  j                         t        j
                  j                  |      5   |  	ddd       t        	t        t        f      s	f		fd}|S c c}w # 1 sw Y   xY w# 1 sw Y   >xY w)zBThis isn't registered as a backend, but is used in some benchmarksN)streamc                      t              t        |       k(  sJ r%t        |       D ]  \  }}|j                  |        j                          rD cg c]  }|j	                          c}S S c c}w rL   )r0   zipcopy_replayclone)	
new_inputsdstsrcxcopy_inputscopy_outputsrC   static_inputsstatic_outputss	       r   runzcudagraphs_inner.<locals>.run   sp    =!S_444z: S		#'56!AGGI66!! 7s   A4)r'   listtupler(   
zeros_likerZ   synchronizeStreamwait_streamcurrent_streamr   	CUDAGraphrC   )
r   r7   r   r   r   r   r   rC   r   r   s
     ``   @@@r   cudagraphs_innerr      sY   ftUm,,,6<=))!,=V 
JJZZ F
uzz0023			6	" v
	JJ++F3	JJ JJ  "E			%		/ /./ntUm4(*	" 	" JA > / /s   F1F6?G6F?G)TT),r   collectionsr   typingr   r   r   r(   torch._dynamor   torch._dynamo.backends.commonr    torch._dynamo.backends.debuggingr	   torch._inductor.cudagraph_utilsr
   r   r   r   r   r   torch._inductor.utilsr   r   r   r   r    torch.multiprocessing.reductionsr   registryr   r@   ra   GraphModulerH   strrQ   rX   intr_   re   r   r   r   r   r    r   <module>r      s     # ' '    6 6   < &$N 4 4 Dxx##Dc]Dehh22 (3- $C D#/ K7\) )  l0A0C D$r    