
    sgM                     t   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ d dlZd dlmZ g dZ G d d	e      Zd
 Zd Zd Zd Z G d d      Z G d d      Z edg d      Z G d de      Z G d de      Z G d de      Z G d d      Z d Z!dZ"dZ#d Z$d"d Z%	 	 	 	 	 	 	 	 	 d#d!Z&y)$    N)defaultdict
namedtuple)
attrgetter)AnyDictListOptionalTuple)
deprecated)
DeviceType)	EventListFormattedTimesMixinIntervalKernelFunctionEventFunctionEventAvgStringTableMemRecordsAccc                        e Zd ZdZ fdZd Zd Zd Zd Zd Z	e
d        Z	 	 	 	 	 	 	 dd	Zd
 Zd ZdedefdZddZd Z xZS )r   z'A list of Events (for pretty printing).c                     |j                  dd       }|j                  dd      }|j                  dd      }t        |   |i | || _        || _        d| _        || _        y )N
use_deviceprofile_memoryF
with_flops)popsuper__init___use_device_profile_memory_tree_built_with_flops)selfargskwargsr   r   r   	__class__s         O/var/www/html/venv/lib/python3.12/site-packages/torch/autograd/profiler_util.pyr   zEventList.__init__   sf    ZZd3
$4e<ZZe4
$)&)%- %    c                 r    | j                          | j                          | j                          d| _        y )NT)_populate_cpu_children_remove_dup_nodes_set_backward_stacktracesr   r!   s    r%   _build_treezEventList._build_tree'   s.    ##% &&(r&   c                 "    | j                         S N)tabler+   s    r%   __str__zEventList.__str__-   s    zz|r&   c                    	 t               }t        t        |             D ]  }| |   j                  | |   j                  j                  | |   j                  k(  s=t        | |   j                  j
                        dk(  sc| |   j
                  | |   j                  _        | |   j                  | |   j                  _        | |   j
                  D ]  }| |   j                  |_         |j                  |        t        |      dk(  ry t        |       D cg c]  \  }}||vs| }}}| j                          | j                  |       Rc c}}w )N   r   )setrangelen
cpu_parentnamecpu_childrenkernelsadd	enumerateclearextend)r!   	to_deleteidxchindevnew_evtss          r%   r)   zEventList._remove_dup_nodes0   s2   ISY' 
'I((4S	,,11T#Y^^CDI00==>!C8<S	8N8NDI((53793D3DDI((0"3i44 =(,S	(<(<=MM#&
' 9~"*3D/RwsBS	=QRHRJJLKK!#  Ss   E*Ec                    | D cg c]-  }|j                   s|j                  t        j                  k(  r|/ }}t	        |t        d            }t        j                  |d       }|D ]   \  }}t	        |d       }g }d}	|D ]  }
t        |      dkD  r|d   }|
j                  j                  |j                  j                  k\  s-|
j                  j                  |j                  j                  kD  r|j                          nC|j                  |
       |
j                  J d|
j                          |
j!                  |       nt        |      dkD  r|j#                  |
         yc c}w )	a4  Populate child events into each underlying FunctionEvent object.

        One event is a child of another if [s1, e1) is inside [s2, e2). Where
        s1 and e1 would be start and end of the child event's interval. And
        s2 and e2 start and end of the parent event's interval

        Example: In event list [[0, 10], [1, 3], [3, 4]] would have make [0, 10]
        be a parent of two other intervals.

        If for any reason two intervals intersect only partially, this function
        will not record a parent child relationship between then.
        thread)keyc                 2    | j                   | j                  fS r.   )rE   node_idevents    r%   <lambda>z2EventList._populate_cpu_children.<locals>.<lambda>a   s    u||U]]&C r&   c                 \    | j                   j                  | j                   j                   gS r.   )
time_rangestartendrI   s    r%   rK   z2EventList._populate_cpu_children.<locals>.<lambda>s   s&    5#3#3#9#9E<L<L<P<P;P"Q r&   r   Nz(There is already a CPU parent event for )is_asyncdevice_typer   CPUsortedr   	itertoolsgroupbyr5   rM   rN   rO   r   append_cpu_childr6   rF   set_cpu_parentappend)r!   evtsync_eventseventsthreads	thread_idthread_eventsthread_events_current_eventscur_endrJ   parents               r%   r(   z EventList._populate_cpu_childrenD   sx   $ 
<<COOz~~$E 
 

 8$
 ##C
  )0 	-$I}#QN 35NG' -.)A-+B/F((..&2C2C2G2GG ++//&2C2C2G2GG '**,//6!,,4REeii[QR4,,V4 .)A-  %%e,#-	-9
s   2E.c                 B   fdi }| D ]D  } |      |j                   |j                  |j                  f}||vs6|j                   ||<   F | D ]I  } |      }||j                  J |j                  |j                  f}||v r||   |_         Cg |_         K y )Nc                 P    | y | j                   dk(  r| S  | j                        S Nr2   )scoper6   )rZ   	bw_parents    r%   rh   z6EventList._set_backward_stacktraces.<locals>.bw_parent   s*    {a
 00r&   )stacksequence_nrrE   
fwd_thread)r!   
fwd_stacksrZ   tprh   s        @r%   r*   z#EventList._set_backward_stacktraces   s    	1 
 	.C~%#))*?__cjj1J&$'IIJqM		.  	#C#A}||///]]ALL1
? *1CI "CI	#r&   c                 &    t        d | D              S )Nc              3   4   K   | ]  }|j                     y wr.   )self_cpu_time_total.0rJ   s     r%   	<genexpr>z0EventList.self_cpu_time_total.<locals>.<genexpr>   s     ?5,,?   )sumr+   s    r%   rq   zEventList.self_cpu_time_total   s    ?$???r&   c                 T    t        | ||||||| j                  | j                  |
      S )a(  Print an EventList as a nicely formatted table.

        Args:
            sort_by (str, optional): Attribute used to sort entries. By default
                they are printed in the same order as they were registered.
                Valid keys include: ``cpu_time``, ``cuda_time``, ``xpu_time``,
                ``cpu_time_total``, ``cuda_time_total``, ``xpu_time_total``,
                ``cpu_memory_usage``, ``cuda_memory_usage``, ``xpu_memory_usage``,
                ``self_cpu_memory_usage``, ``self_cuda_memory_usage``,
                ``self_xpu_memory_usage``, ``count``.
            top_level_events_only(bool, optional): Boolean flag to determine the
                selection of events to display. If true, the profiler will only
                display events at top level like top-level invocation of python
                `lstm`, python `add` or other functions, nested events like low-level
                cpu/cuda/xpu ops events are omitted for profiler result readability.

        Returns:
            A string containing the table.
        )	sort_by	row_limitmax_src_column_widthmax_name_column_widthmax_shapes_column_widthheaderr   r   top_level_events_only)_build_tabler   r    )r!   rx   ry   rz   r{   r|   r}   r~   s           r%   r/   zEventList.table   s;    : !5"7$;//''"7
 	
r&   c                 b   ddl }| j                  sdn| j                  }t        |d      5 }g }d}|j                  d       | D ]  }|j                  |j                  dj                  |j                  |j                  j                  |j                  j                         |j                  s|j                  nd|j                   d|j                   d	             |j                  D ]P  }|j                  d
|j                   d|j                  j                   d|j                   d| d| d       |dz  }R  t        |       dkD  r=|j                  |j                         dz
  |j                          |j#                          |j                  d       ddd       y# 1 sw Y   yxY w)zExport an EventList as a Chrome tracing tools file.

        The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.

        Args:
            path (str): Path where the trace will be written.
        r   Ncudaw[zc{{"name": "{}", "ph": "X", "ts": {}, "dur": {}, "tid": {}, "pid": "CPU functions", "args": {{}}}}, z
" node_id:z, thread_id:z "z
{"name": "z", "ph": "s", "ts": z	, "tid": z , "pid": "CPU functions", "id": z, "cat": "cpu_to_z", "args": {}}, r2      ])osr   openwrite
trace_nameformatrM   rN   
elapsed_us	is_remoterE   rH   r9   r5   seektellSEEK_SETtruncate)	r!   pathr   device_namefchrome_eventsnext_idrZ   ks	            r%   export_chrome_tracezEventList.export_chrome_trace   s    	$($4$4f$:J:J$_ ,	MG GGCL !!>>)' (.v,,113"}} 

)#++l3::,bQ(   !A GG%cnn%5 6!!$!5!5 6 7""%** .!!(	 ***5 7((	 qLG!'!!D 4y1}qvvx!|R[[1

GGCLY,	 ,	 ,	s   E1F%%F.c                 
    g dS )N)rq   self_cuda_time_totalself_xpu_time_totalself_privateuse1_time_total r+   s    r%   supported_export_stacks_metricsz)EventList.supported_export_stacks_metrics  s    
 	
r&   r   metricc           	         || j                         vr%t        dt        | j                               z         t        j                  dd      }t	        |d      5 }| D ]  }|j
                  st        |j
                        dkD  s)t        ||j                  dd      j                  dd      j                  d	d            }t        |      dkD  std
}t        |j
                        D ]  }||j                  |      z  }|dz  } |d d dz   t        t        |            z   }|j                  |dz           	 d d d        y # 1 sw Y   y xY w)Nzmetric should be one of: z ;	
____r   r   r   devicexpuprivateuse1 ;rP    
)r   
ValueErrorstr	maketransr   ri   r5   getattrreplaceintreversed	translater   )	r!   r   r   translate_tabler   rZ   metric_value	stack_strentrys	            r%   export_stackszEventList.export_stacks  s>   ==??+d::<=>  --&9$_ 	2 299SYY!!3#*vx8 1 9	$L <(1,$&	%-cii%8 -E%)III%,I- %.crNS$83s<?P;Q$Q		D 012	2 	2 	2s    E-EA
EA(EEc                 l   | j                   sJ t        t              }dt        t        df   fd}| D ]  }| ||||         j                  |         t        |j                         | j                  | j                  | j                        }|D ]   }|j                  d| |_        |rd|_        " |S )aH  Averages all function events over their keys.

        Args:
            group_by_input_shapes: group entries by
                (event name, input shapes) rather than just event name.
                This is useful to see which input shapes contribute to the runtime
                the most and may help with size-specific optimizations or
                choosing the best candidates for quantization (aka fitting a roof line)

            group_by_stack_n: group by top n stack trace entries

        Returns:
            An EventList containing FunctionEventAvg objects.
        return.c                 ^   t        | j                        t        | j                        t        | j                        t        | j                        t        | j
                        g}|r$|j                  t        | j                               |dkD  r|| j                  d | z  }t        |      S Nr   )
r   rF   rH   rR   	is_legacyis_user_annotationrY   input_shapesri   tuple)rJ   group_by_input_shapesgroup_by_stack_nrF   s       r%   get_keyz'EventList.key_averages.<locals>.get_key=  s    EIIEMM"E%%&EOO$E,,-C %

3u1123!#u{{#4$455:r&   r   r   r   Nr   )r   r   r   r
   r   r:   r   valuesr   r   r    ri   r   )r!   r   r   statsr   rZ   avg_lists          r%   key_averageszEventList.key_averages+  s     9DEU9V	uSRUX 	  	RC'#46FGHLLSQ	R LLN''//''	
  	&C		"3#34CI(#% 	& r&   c                 N    t               }| D ]  }||z  }d|_         d|_        |S )zVAverages all events.

        Returns:
            A FunctionEventAvg object.
        NTotal)r   rF   )r!   
total_statrZ   s      r%   total_averagezEventList.total_averageZ  s;     &'
 	"C#J!JN	" !
r&   )Nd   K   7   P   NF)Fr   )__name__
__module____qualname____doc__r   r,   r0   r)   r(   r*   propertyrq   r/   r   r   r   r   r   r   __classcell__)r$   s   @r%   r   r      s    1& "(D-L#4 @ @
   "#(
T7r
2# 2s 20-^r&   r   c                 N    d}d}| |k\  r	| |z  ddS | |k\  r	| |z  ddS | ddS )+Define how to format time in FunctionEvent.g    .Ag     @@z.3fsmsusr   )time_usUS_IN_SECONDUS_IN_MSs      r%   _format_timer   h  sU    "LH,L(-Q//(H$S),,c]"r&   c                 D    |dk(  r| dk(  s
J d|         y| dz  |z  ddS )r   r   zExpected time_us == 0 but got NaNg      Y@.2f%r   )r   total_time_uss     r%   _format_time_sharer   s  s?    !|G=gYGG|o-c2!44r&   c                     d}d|z  }d|z  }t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       |k\  r| dz  |z  ddS t        |       dz   S )z&Return a formatted memory size string.i         ?r   z Gbz Mbz Kbz b)absr   )nbytesKBMBGBs       r%   _format_memoryr   {  s    	B	B	B
6{b3,#C(,,	V	3,#C(,,	V	3,#C(,,6{T!!r&   c                       t         fd      S )Nc                 .    t        t        |             S r.   )r   r   )r!   r7   s    r%   rK   z!_attr_formatter.<locals>.<lambda>  s    gdD.A!B r&   )r   r7   s   `r%   _attr_formatterr     s    BCCr&   c                       e Zd ZdZ ed      Z ed      Z ed      Z ed      Z ed      Z	 ed      Z
ed        Zed	        Ze ed
e      d               Zy)r   z{Helpers for FunctionEvent and FunctionEventAvg.

    The subclass should define `*_time_total` and `count` attributes.
    cpu_timedevice_timecpu_time_totaldevice_time_totalrq   self_device_time_totalc                 \    | j                   dk(  rdS d| j                  z  | j                   z  S Nr   g        r   )countr   r+   s    r%   r   zFormattedTimesMixin.cpu_time  s+    jjAosQ31D1D+Dtzz+QQr&   c                 \    | j                   dk(  rdS d| j                  z  | j                   z  S r   )r   r   r+   s    r%   r   zFormattedTimesMixin.device_time  s+    jjAosT31G1G+G$**+TTr&   z<`cuda_time` is deprecated, please use `device_time` instead.categoryc                     | j                   S r.   )r   r+   s    r%   	cuda_timezFormattedTimesMixin.cuda_time  s     r&   N)r   r   r   r   r   cpu_time_strdevice_time_strcpu_time_total_strdevice_time_total_strself_cpu_time_total_strself_device_time_total_strr   r   r   r   FutureWarningr   r   r&   r%   r   r     s    
 #:.L%m4O()9:+,?@-.CD!01I!JR R U U F 	 
 r&   r   c                       e Zd Zd Zd Zy)r   c                      || _         || _        y r.   )rN   rO   )r!   rN   rO   s      r%   r   zInterval.__init__  s    
r&   c                 4    | j                   | j                  z
  S )z4
        Returns the length of the interval
        )rO   rN   r+   s    r%   r   zInterval.elapsed_us  s     xx$**$$r&   N)r   r   r   r   r   r   r&   r%   r   r     s    %r&   r   r   )r7   r   durationc                   Z   e Zd ZdZdddddddddddej
                  ddddddddfdZd Zd Zd	 Z	e
d
        Ze
d        Ze
 ede      d               Ze
d        Ze
d        Ze
d        Ze
 ede      d               Ze
d        Ze
 ede      d               Ze
d        Zd Zy)r   z.Profiling information about a single function.Nr   FrP   c                    || _         || _        || _        || _        t	        ||      | _        || _        || _        g | _        d| _	        g | _
        d | _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        || _        || _        || _        ||n|| _        || _        || _        || _        d| _        d| _        d| _        y )Nr2   rP   ) idrH   r7   r   r   rM   rE   rk   r9   r   r8   r6   r   concrete_inputskwinputsri   rg   r   cpu_memory_usagedevice_memory_usagerQ   r   rj   rR   device_indexdevice_resource_idr   flopsr   self_cpu_percenttotal_cpu_percenttotal_device_percent)r!   r	  r7   rE   start_usend_usrk   r   ri   rg   r   r  r  rQ   r   rj   rH   rR   r  r  r   r  r   r
  r  r   s                             r%   r   zFunctionEvent.__init__  s    8 #	)$,Xv$>!)3%'
1337-9*9(0 

)3%5(; &( +'2!-(0F6H 	  )$)
2D "!#$&!r&   c                     | j                   t        j                  k(  sJ | j                  j	                  t        |||             y r.   )rR   r   rS   r9   rY   r   )r!   r7   r   r  s       r%   append_kernelzFunctionEvent.append_kernel  s5    :>>111F4:;r&   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ | j
                  j                  |       y)zAppend a CPU child of type FunctionEvent.

        One is supposed to append only direct children to the event to have
        correct self cpu time being reported.
        N)rR   r   rS   
isinstancer   r8   rY   )r!   childs     r%   rW   zFunctionEvent.append_cpu_child   sX     :>>111%///  JNN222  'r&   c                     | j                   t        j                  k(  sJ t        |t              sJ |j                   t        j                  k(  sJ || _        y)a$  Set the immediate CPU parent of type FunctionEvent.

        One profiling FunctionEvent should have only one CPU parent such that
        the child's range interval is completely inside the parent's. We use
        this connection to determine the event is from top-level op or not.
        N)rR   r   rS   r  r   r6   )r!   rc   s     r%   rX   zFunctionEvent.set_cpu_parent  sK     :>>111&-000!!Z^^333 r&   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr.   )r  rs   r  s     r%   rt   z6FunctionEvent.self_cpu_memory_usage.<locals>.<genexpr>  s      +
',E""+
ru   )rQ   rR   r   rS   r  rv   r8   r+   s    r%   self_cpu_memory_usagez#FunctionEvent.self_cpu_memory_usage  sJ    ==D,,
>$$s +
040A0A+
 (
 
 	
r&   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr.   )r  r  s     r%   rt   z9FunctionEvent.self_device_memory_usage.<locals>.<genexpr>%  s      .
*/E%%.
ru   )rQ   rR   r   rS   r  rv   r8   r+   s    r%   self_device_memory_usagez&FunctionEvent.self_device_memory_usage!  sJ    ==D,,
>''# .
373D3D.
 +
 
 	
r&   zO`self_cuda_memory_usage` is deprecated. Use `self_device_memory_usage` instead.r   c                     | j                   S r.   r"  r+   s    r%   self_cuda_memory_usagez$FunctionEvent.self_cuda_memory_usage)  s     ,,,r&   c                 r    | j                   t        j                  k(  r| j                  j	                         S yr   )rR   r   rS   rM   r   r+   s    r%   r   zFunctionEvent.cpu_time_total1  s*    z~~-??--//r&   c                     | j                   s| j                  t        j                  k7  ry| j                  t        d | j                  D              z
  S )Nr   c              3   4   K   | ]  }|j                     y wr.   )r   r  s     r%   rt   z4FunctionEvent.self_cpu_time_total.<locals>.<genexpr><  s      )
%*E  )
ru   )rQ   rR   r   rS   r   rv   r8   r+   s    r%   rq   z!FunctionEvent.self_cpu_time_total8  sJ    ==D,,
>""S )
.2.?.?)
 &
 
 	
r&   c                    | j                   s| j                  sy| j                  t        j                  k(  ra| j
                  s9t        d | j                  D              t        d | j                  D              z   S t        d | j                  D              S | j                  t        j                  t        j                  t        j                  fv sJ | j                  j                         S )Nr   c              3   4   K   | ]  }|j                     y wr.   r  rs   kinfos     r%   rt   z2FunctionEvent.device_time_total.<locals>.<genexpr>G       De5>>Dru   c              3   4   K   | ]  }|j                     y wr.   r   )rs   r@   s     r%   rt   z2FunctionEvent.device_time_total.<locals>.<genexpr>G  s      K-/B((Kru   c              3   4   K   | ]  }|j                     y wr.   r+  r,  s     r%   rt   z2FunctionEvent.device_time_total.<locals>.<genexpr>L  r.  ru   )rQ   r   rR   r   rS   r   rv   r9   r8   CUDAPrivateUse1MTIArM   r   r+   s    r%   r   zFunctionEvent.device_time_total@  s    ==z~~->>Dt||DDs K373D3DK H  
 Dt||DDD##&&(   
 ??--//r&   zA`cuda_time_total` is deprecated. Use `device_time_total` instead.c                     | j                   S r.   r0  r+   s    r%   cuda_time_totalzFunctionEvent.cuda_time_totalU  s     %%%r&   c                 R   | j                   s| j                  sy| j                  t        j                  k(  r)| j
                  t        d | j                  D              z
  S | j                  t        j                  t        j                  t        j                  fv sJ | j
                  S )Nr   c              3   4   K   | ]  }|j                     y wr.   r0  r  s     r%   rt   z7FunctionEvent.self_device_time_total.<locals>.<genexpr>b  s      0,1''0ru   )rQ   r   rR   r   rS   r   rv   r8   r2  r3  r4  r+   s    r%   r   z$FunctionEvent.self_device_time_total]  s    ==z~~-))C 0595F5F0 -   ##&&(   
 )))r&   zK`self_cuda_time_total` is deprecated. Use `self_device_time_total` instead.c                     | j                   S r.   r   r+   s    r%   r   z"FunctionEvent.self_cuda_time_totalm  s     ***r&   c                     | j                   S r.   r   r+   s    r%   rF   zFunctionEvent.keyu  s    yyr&   c           	         | j                   }| j                  }| j                  }dj                  g d| j                   d| j
                   d| j                   d| j                   d| j                   d| j                  j                   d| j                  j                   d	t        | j                  D cg c]  }|j                   c}       d
| d| d| j
                   d| j                   dt        | j                         d| j                    d
| d| d| j"                   d| j$                   d| j&                   d| j(                   d      S c c}w )Nr   z<FunctionEvent id=z name=z device_type=z	 node_id=
 cpu_time=z
 start_us=z end_us=z cpu_children=r   _time=z thread= input_shapes= cpu_memory_usage=_memory_usage=z
 is_async=z is_remote=z seq_nr=z is_legacy=>)r   r   r  joinr	  r7   rR   rH   r   rM   rN   rO   r   r8   rE   r   r  rQ   r   rj   r   )r!   r   r   r  r  s        r%   __repr__zFunctionEvent.__repr__y  s   oo**"66y y  y	 y y		{ y- yHXHXGY yYb ycgcocobp y q y))*y*4y59__5J5J4KyKSyTXTcTcTgTgShyiyt7H7H Ie IJKyKLyMXMyY_y`k_lymy II;y 'y (,{{my 4By CFdFWFWBXAYyZ y !% 5 56	y 78	y 9D}	y ES	y TgRg	yh	y
 y
 '2y
 37..1Ay
 BJy
 KOJZJZI[y
 \gy
 hlgugufvy
 wxy	
 !Js   E:)r   r   r   r   r   rS   r   r  rW   rX   r   r  r"  r   r  r%  r   rq   r   r6  r   r   rF   rD  r   r&   r%   r   r     sb   8 NN 5<'|<	(
! 
 
 
 
 Y-	 
-   
 
 0 0( K&	 
& * * U+	 
+  
r&   r   c                   *    e Zd ZdZddZd Zd Zd Zy)r   z:Used to average stats over multiple FunctionEvent objects.Nc                 T   d | _         d| _        d| _        d| _        d| _        d | _        d| _        d| _        d| _        d| _	        d | _
        d | _        d | _        d| _        d| _        d| _        d| _        d | _        d | _        t&        j(                  | _        d| _        d| _        y )Nr   F)rF   r   rH   rQ   r   r   r   r   rq   r   r   ri   rg   r  r  r  r"  r8   r6   r   rS   rR   r   r  r+   s    r%   r   zFunctionEventAvg.__init__  s    "&
#$)-#$&'() +,#7;%)
$(
%&() *+"-.%;?37'1~~$
r&   c                     | j                   |j                   | _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  | _        |j                  | _	        |j                  | _
        |j                  | _        |j                  | _        t        |t        t        f      sJ |j                   | j                   k(  sJ | xj                   |j                   z  c_        | xj"                  |j"                  z  c_        | xj$                  |j$                  z  c_        | xj&                  |j&                  z  c_        | xj(                  |j(                  z  c_        | xj*                  |j*                  z  c_        | xj,                  |j,                  z  c_        | xj.                  |j.                  z  c_        | xj0                  |j0                  z  c_        | j2                  |j2                  | _        | S |j2                  | xj2                  |j2                  z  c_        | S r.   )rF   rH   rQ   r   r6   r8   r   ri   rg   rR   r   r   r   r  r   r   r   r   rq   r   r  r  r  r"  r   r  r!   others     r%   r:   zFunctionEventAvg.add  s   88 yyDH ==DL!NNDM"__DN#..DO % 2 2D % 2 2DDJDJ$00D"__DN#..DO&+&>&>D#%-1A!BCCCyyDHH$$$u333%"9"99  E$=$== ##u'C'CC#!7!77  E$=$== ""e&A&AA"%%)G)GG%

ekk!
::DJ  [[$JJ%++%Jr&   c                 $    | j                  |      S r.   )r:   rH  s     r%   __iadd__zFunctionEventAvg.__iadd__  s    xxr&   c                 <   | j                   sdn| j                   }| j                  }| j                  }| j                  }d| j                   d| j
                   d| j                   d| d| d| d| dt        | j                         d	| j                   d| d
| dS )Nr   z<FunctionEventAvg key=z self_cpu_time=r=  z  self_r>  r   r?  r@  rA  rB  )
r   r  r   r  rF   r   r   r   r   r  )r!   r   self_device_timer   device_memorys        r%   rD  zFunctionEventAvg.__repr__  s    $(OOf::**00$TXXJod>Z>Z=[[efjfwfwex y M(8'9;-vk]Zhilmqm~m~i  iA A  $ 5 56a}NS`Raabd	
r&   )r   N)r   r   r   r   r   r:   rK  rD  r   r&   r%   r   r     s    D0"H	
r&   r   c                       e Zd Zd Zy)r   c                 p    t        |      dkD  rt        j                  j                  |      n|| |<   | |   S rf   )r5   torch_C	_demangle)r!   rF   s     r%   __missing__zStringTable.__missing__  s2     033x!|EHH&&s+S	Cyr&   N)r   r   r   rT  r   r&   r%   r   r     s    r&   r   c                       e Zd ZdZd Zd Zy)r   z=Acceleration structure for accessing mem_records in interval.c                     || _         g | _        g | _        t        |      dkD  rPt	        t        |      D cg c]  \  }}|d   j                         |f c}}      }t        | \  | _        | _        y y c c}}w r   )_mem_records_start_nses_indicesr5   rT   r;   start_nszip)r!   mem_recordsirtmps        r%   r   zMemRecordsAcc.__init__  sn    '&(#%{a9[;QR41a1Q4==?A.RSC.13i+Ddm  Rs   A5
c              #      K   t        j                  | j                  |dz        }t        j                  | j                  |dz        }t	        ||      D ]   }| j
                  | j                  |       " yw)z
        Return all records in the given interval
        To maintain backward compatibility, convert us to ns in function
        i  N)bisectbisect_leftrX  bisect_rightr4   rW  rY  )r!   r  r  	start_idxend_idxr]  s         r%   in_intervalzMemRecordsAcc.in_interval  sp     
 &&t'7'7DI	%%d&6&6Fy'* 	6A##DMM!$455	6s   A7A9N)r   r   r   r   r   rf  r   r&   r%   r   r     s    G86r&   r   c                 4     g d}t         fd|D              S )N))autograd/__init___make_grads)rh  backward)ztorch/tensorrj  )_internal/common_utilsprof_callable)rk  prof_func_call)rk  prof_meth_callc              3   @   K   | ]  }|d    v xr |d   v    yw)r   r2   Nr   )rs   r   r   s     r%   rt   z&_filter_stack_entry.<locals>.<genexpr>  s)     OAaDEM3adem4Os   )all)r   filtered_entriess   ` r%   _filter_stack_entryrr    s     O>NOOOr&   z[memory]z[OutOfMemory]c                 .    t         t        ddddddg}| |v S )Nz profiler::_record_function_enterz$profiler::_record_function_enter_newzprofiler::_record_function_exitzaten::is_leafzaten::output_nrzaten::_version)MEMORY_EVENT_NAMEOUT_OF_MEMORY_EVENT_NAME)r7   filtered_out_namess     r%   _filter_namerw    s2     	 *.)	 %%%r&   c                 N    t               }||    } |r| j                  d      rd} | S )NzProfilerStep#zProfilerStep*)r   
startswith)r7   with_wildcardstring_tables      r%   _rewrite_namer|    s,    =LD???+"DKr&   c
                   ,-./0 t        |       dk(  ryt        d | D              }
t        d | D              }| d   j                  }|s|
rt        d      t        d | D              }t	        t        | fdd	
      |||      } t        d | D              dz   }|t        ||      }t        d | D              dz   }|t        ||      }d}|}d}g }| D ]C  }|j                  t        |j                        dkD  s)|j                  |j                         E t        |      dkD  }|r#t        d |D              dz   }|t        ||      }g d}||j                         nd}|
r"|j                  d| d| d| d| dg       |r1|j                  ddg       |r|r|j                  | dd| dg       |j                  d       t        d | D              }|r|j                  d       d,dg0dg-, g.d1,-.0fd	}d } ||       |d d D ]
  } ||        |r|j                  d!        ||       |r|j                  d"        ||d#$       |rug }| D ]-  }|j                  dkD  s|j                  |j                         / t        |      dk7  r1 |t        |            \  }}|j                  d%|         ||       nd&}0d   }-d   } .d   }!d}g //fd'}"d}#d}$| D ]  }|#|j                  z  }#|j                  t        j                   k(  r|j"                  r|$|j$                  z  }$K|j                  t        j&                  t        j(                  t        j*                  fv s|j,                  r|$|j$                  z  }$ | |"d(|!z          |"|       |	r |"d(|!z          |"d)        |"|         |" |j.                  |         |"|        d* }%d}&| D ]  }|&|k(  r n|	r|j0                  |&d z  }&|j2                  }'|t        |'      |d+z
  k\  r|'d|d+z
   d,z   }'t5        |j                  |#      |_        |j8                  st5        |j:                  |#      nd|_        |'|j6                  |j>                  |j<                  |j@                  |jB                  g}(|
rXt5        |j$                  |$      |_"        |(j                  |jF                  |jD                  |jH                  |jJ                  g       |rv|(j                  tM        |jN                        tM        |jP                        g       |r;|r9|(j                  tM        |jR                        tM        |jT                        g       |(j                  |jV                         |r|(j                  |jX                         |r'|(j                  t[        |j\                        d|        |rA|j                  dk  r|(j                  d-       n |(j                  |j                  z  d.       |rAd})t        |j                        dkD  r |%|j                  d   |      })|(j                  |)        |" |j.                  |(        |sdgt        |      d z
  z  }*|j                  d d D ]"  }+ |" |j.                  |* |%|+|      gz           $ |*j                  d        |" |j.                  |*          |"|         |"d/t_        |#              |
r) |"d||j                         nd d0t_        |$              dja                  /      S )2zUPrint a summary of events (which can be a list of FunctionEvent or FunctionEventAvg).r   r   c              3   :   K   | ]  }|j                   d kD    ywr   Nr:  rr   s     r%   rt   z_build_table.<locals>.<genexpr>0  s     Ou%66:O   c              3   :   K   | ]  }|j                   d kD    ywr  r$  rr   s     r%   rt   z_build_table.<locals>.<genexpr>1  s     P77!;Pr  z9use_device is None, but there is device performance data.c              3   l   K   | ],  }|j                   d uxr t        |j                         dkD   . y wr   )r   r5   rr   s     r%   rt   z_build_table.<locals>.<genexpr>9  s;       
		4	'	GC0B0B,Ca,G	Gs   24Nc                 |    t        | j                  dd      j                  dd      j                  dd            S )Nr   r   r   r   )r   r   )rZ   rx   s    r%   rK   z_build_table.<locals>.<lambda>B  s5    OOFH5WUH-W]H5	! r&   T)rF   reverser   c              3   F   K   | ]  }t        |j                          y wr.   )r5   rF   rs   rZ   s     r%   rt   z_build_table.<locals>.<genexpr>O  s     ;SCL;s   !   c              3   X   K   | ]"  }t        t        |j                               $ y wr.   )r5   r   r   r  s     r%   rt   z_build_table.<locals>.<genexpr>S  s      KSc#c&6&6"78Ks   (*   c              3   @   K   | ]  }t        d  |D                yw)c              3   2   K   | ]  }t        |        y wr.   r5   )rs   r   s     r%   rt   z)_build_table.<locals>.<genexpr>.<genexpr>b  s     25CJ2s   N)max)rs   ri   s     r%   rt   z_build_table.<locals>.<genexpr>b  s     Gu2E22Gs   )Namez
Self CPU %zSelf CPUzCPU total %z	CPU totalzCPU time avgNonezSelf z %z totalz	 time avgzCPU MemzSelf CPU Memz Memz
# of Callsc              3   :   K   | ]  }|j                   d k7    yw)rP   N)rH   r  s     r%   rt   z_build_table.<locals>.<genexpr>  s     =s*=r  zNode IDr   c                     dxx   d|z   t        |       z   dz   dz  z   z  cc<   dxx   d| z  dz  z   z  cc<   dxx   | z   z  cc<   y )Nr   z{: }r   -)r   )paddingtext_dirSPACING_SIZEheader_sep_lstline_length_lstrow_format_lsts     r%   
add_columnz _build_table.<locals>.add_column  sh    qHs7|+c1S<5GH	
 	qS7]cL.@AAg44r&   c                 $   g d}| dkD  sJ t        dt        t        j                  |       dz  t	        t        |      dz
                    }|dk\  r|t        |      k  sJ t        dt        j                  |      dz        |t        |         fS )N)FLOPsKFLOPsMFLOPsGFLOPsTFLOPsPFLOPsr      r2   
   g      )	r  minmathlog10floatr5   powfloorr   )r  flop_headers	log_flopss      r%   auto_scale_flopsz&_build_table.<locals>.auto_scale_flops  s    
 qyy3tzz%014eC<MPQ<Q6RST	A~)c,.?"???BI.57c)n9UVVr&   r2   zInput ShapeszSource Location<)r  zTotal Fc                 J    j                  |        j                  d       y )Nr   )rY   )r   results    r%   rY   z_build_table.<locals>.append  s    adr&   =z1This report only display top-level ops statisticsc                 t    t        |       |kD  r)t        |       |z
  }| |d  } t        |       dkD  rd| dd  z   } | S )Nr  ...r  )r   src_column_widthoffsets      r%   	trim_pathz_build_table.<locals>.trim_path  sI    t9''Y!11F=D4y1}tABx'r&   r  r  z--z8.3fzSelf CPU time total: z time total: )rB  )1r5   anyr   RuntimeErrorr   rT   r  r  ri   rY   upperr=   r  rq   rR   r   rS   r   r   r2  r3  r4  r   r   r6   rF   r   r  rQ   r   r  r   r   r   r  r  r   r   r   r  r  r  r"  r   rH   r   r   r   rC  )1r\   rx   r}   ry   rz   r{   r|   r   r   r~   has_device_timehas_device_memr   has_input_shapesname_column_widthshapes_column_widthDEFAULT_COLUMN_WIDTHflops_column_widthr  stacksrZ   	has_stackheadersr   append_node_idr  r  _	raw_flopsflops_scaleflops_header
row_format
header_sepline_lengthrY   sum_self_cpu_time_totalsum_self_device_time_totalr  event_limitr7   
row_values	src_fieldempty_headersr   r  r  r  r  r  s1    `                                          @@@@@r%   r   r      s    6{aOOOOPPPN%%J /VWW  
  	 ")!
  ;F;;a?( 13HIKFKKaO*!"57NO-F %99 S^a%7MM#))$% FaIGGG!K 	  +"#35IJG )3(>*""$FK}%}B'-v&-y)		
 	
 .NN"m4(K=- NN< =f==Ny! LTNTN$}oO5 5W  !QR[ )'() ~&&'()#c2	 	,Cyy1}  +	, y>Q*:3y>*J'[,NNVL>23)*J"J"J!!$KJ F  !" E3#:#::??jnn,&#*D*DD&OO&& ** '#*D*DD&E$ s[ !vs[ !BC
:
:g&'
: K X6)# S^^%?1Kww ,T>SVW>W1W50146>D1##%< 

 << s113JK 	   ''!!""	

 '9**,F(C$ 22,,--''  #3#7#78"3#<#<=	 n!! 's'>'>?&s'C'CD	 	II	
 ckk*c#"2"234H5HIJyyA~!!$'!!SYY%<T$BDI399~!%ciil4DE	i( z  *-.DCL1$45M12 %J%%'9U<L+M*NN   $$:$$m45qX6t :
"<0G#H"IJK**@J$$&fM N'(BCDF	
 776?r&   )F)	NNr   r   r   r   FFF)'ra  rU   r  collectionsr   r   operatorr   typingr   r   r   r	   r
   typing_extensionsr   rQ  torch.autogradr   __all__listr   r   r   r   r   r   r   r   r   r   r   r   rr  rt  ru  rw  r|  r   r   r&   r%   <module>r     s       /  3 3 (  %	K K\
5"D   <	% 	% 
H<	=I
' I
XK
* K
\+ 6 6,	P  * && vr&   