
    sg~              
       N   d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
mZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlZd dlmZmZ d dlmZ d d	lmZm Z  d d
l!m"Z"m#Z# d dl$m%Z%m&Z& d dl'm(Z(m)Z) erd dl*m+Z+  e,ejZ                  j]                  dd             d k(  rdndZ/dZ0dgZ1 G d de2e      Z3 G d de2e      Z4 G d de3      Z5 G d de4      Z6 G d d      Z7 G d d      Z8de2d e,fd!Z9d"e,d#e,d$e,d ee:e,f   fd%Z;d&eejx                  ee2e,f   f   de2d dfd'Z=d&eejx                  ee2e,f   f   de2d dfd(Z>d)ee4eeejx                  ee2e,f   f      f   de2d dfd*Z?d)ee4eeejx                  ee2e,f   f      f   de2d dfd+Z@ G d, d-e      ZA G d. de#      ZBy)/    N)deepcopy)autoEnum)partialwraps)
AnyCallableDictListOptionalSetTupleTypeTYPE_CHECKINGUnion)Self)nnoptim)
ModTracker)!register_optimizer_step_post_hook register_optimizer_step_pre_hook)is_traceable_wrapper_subclassTorchDispatchMode)tree_flattentree_map_only)WeakIdKeyDictionaryweakref)RemovableHandlePYTORCH_NO_CUDA_MEMORY_CACHINGi      Total
MemTrackerc                       e Zd ZdZy)_RefTypeziBase Class for defining memory reference types, categorizing tensors based on their usage within a model.N__name__
__module____qualname____doc__     W/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/_tools/mem_tracker.pyr$   r$   2   s    sr+   r$   c                       e Zd ZdZy)_Statez;Base Class for defining module state to capture snapshots .Nr%   r*   r+   r,   r.   r.   6   s    Er+   r.   c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
_MemRefTypea  
    An enum to define memory reference types, categorizing tensors based on their usage within a model.

        - PARAM: Tensors registered as nn.Parameter within modules.
        - BUFFER: Tensors registered as nn.Buffer within modules.
        - GRAD: Gradients associated with parameters.
        - ACT: Tensors produced during the forward pass and recomputation in activation checkpointing.
        - TMP: Temporary memory used during the backward pass, including gradients of activations.
        - OPT: Tensors holding optimizer states.
        - OTH: Tensors registered via `track_external` that do not fit the above categories.
    	ParameterBufferGradient
ActivationTempOptstateOtherN)r&   r'   r(   r)   PARAMBUFFERGRADACTTEMPOPTOTHr*   r+   r,   r0   r0   :   s,    
 EFD
CD
C
Cr+   r0   c                   0    e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zy
)	_ModStatea  
    An enum to define the state of a module.

        - PRE_FW: The module is about to run the forward pass.
        - POST_FW: The module has finished running the forward pass.
        - PEAK_FW: The module has reached the peak memory usage during the forward pass.
        - PRE_BW: The module is about to run the backward pass.
        - PRE_FW_AC: The module is about to run the forward pass with activation checkpointing.
        - POST_FW_AC: The module has finished running the forward pass with activation checkpointing.
        - POST_BW: The module has finished running the backward pass.
        - PEAK_BW: The module has reached the peak memory usage during the backward pass.
    zPre-ForwardzPost-ForwardzPeak-ForwardzPre-BackwardzPre-Forward-ACzPost-Forward-ACzPost-BackwardzPeak-BackwardN)r&   r'   r(   r)   PRE_FWPOST_FWPEAK_FWPRE_BW	PRE_FW_AC
POST_FW_ACPOST_BWPEAK_BWr*   r+   r,   r@   r@   P   s1     FGGF I"JGGr+   r@   c                       e Zd ZdZdefdZy)_ModMemStatsa  
    A class to store the memory statistics of a module.

    Args:
        mod_fqn (str): The fully qualified name of the module.
    Attributes:
        mod_fqn (str): The fully qualified name of the module.
        parameter_mem (int): The memory usage of the parameters of the module.
        buffer_mem (int): The memory usage of the buffers of the module.
        input_mem (int): The memory usage of the inputs to the module.
        output_mem (int): The memory usage of the outputs from the module.
        snapshots (Dict[_ModState, Dict[torch.device, Dict[str, int]]]): A dictionary of memory snapshots
        of the module at different states defined by ``_ModState``.
    Note:
        The memory snapshot is stored as a dictionary - Dict[torch.device, Dict[str, int]], where each key is a device,
         and each value is another dictionary with keys as memory reference types defined by `_MemRefType` and
         values as the memory consumed in bytes.
    mod_fqnc                 >    || _         |  |  |  |  i | _        i | _        y N)rK   
local_peak	snapshots)selfrK   s     r,   __init__z_ModMemStats.__init__|   s#    35TVr+   N)r&   r'   r(   r)   strrQ   r*   r+   r,   rJ   rJ   h   s    &W Wr+   rJ   c                   P   e Zd ZdZdededej                  deddf
dZdefd	Z	d
ej                  defdZedej                  deej                     fd       Ze	 dd
ej                  dej                  dedeeeej*                  gef      deeej*                  f   f
d       Zy)_WeakRefInfozN
    Manages memory statistics and device attributes for tensor storages.
    sizeelement_sizedevicereftypereturnNc                 f    || _         || _        || _        || _        | j	                         | _        y)a  
        Initializes the ``_WeakRefInfo`` object with tensor storage properties.

        Args:
            size (int): The number of elements in the tensor storage.
            element_size (int): The size of each element in the tensor storage.
            device (torch.device): The device on which the tensor is allocated.
            reftype (_RefType): The reference type of the tensor.
        N)rU   rV   rX   rW   _calculate_mem_consumedmem_consumed)rP   rU   rV   rW   rX   s        r,   rQ   z_WeakRefInfo.__init__   s2     	( 88:r+   c                     | j                   | j                  z  }| j                  j                  dk(  r#t	        j
                  |t        z        t        z  S |S )z
        Calculates the memory consumed by the tensor storage, considering device-specific allocation rules.

        Returns:
            int: The memory consumed in bytes.
        cuda)rU   rV   rW   typemathceil_PYTORCH_MIN_ALLOCATE)rP   mems     r,   r[   z$_WeakRefInfo._calculate_mem_consumed   sJ     ii$+++;;v%99c%::;>SSS
r+   stc                     |j                         | j                   k7  r*|j                         | _         | j                         | _        | j                  S )a  
        Updates and returns the memory consumed if the storage size has changed.

        Args:
            st (torch.UntypedStorage): The tensor storage to check for size updates.

        Returns:
            int: The updated memory consumed in bytes.
        )rU   r[   r\   )rP   rd   s     r,   update_mem_consumedz _WeakRefInfo.update_mem_consumed   s@     779		!	DI $ < < >D   r+   tc           	         | g}t               }t        |      dkD  r|j                         }t        |      r=|j	                         \  }}|j                  |D cg c]  }t        ||       c}       nTt        |d      s)t        j                  dt        |       t        d       n|j                  |j                                t        |      dkD  r|S c c}w )z
        Recursively extracts untyped storages from a tensor or its subclasses.

        Args:
            t (torch.Tensor): The tensor to extract storages from.

        Returns:
            Set[torch.UntypedStorage]: A set of untyped storages.
        r   untyped_storagezEExpected a tensor or a traceable wrapper-subclass of tensor, but got    )category
stacklevel)setlenpopr   __tensor_flatten__extendgetattrhasattrwarningswarnr_   UserWarningaddri   )rg   unflattened_tensorsflattened_tensor_storagesobjattrs_attrs          r,   get_untyped_storagesz!_WeakRefInfo.get_untyped_storages   s      !c$'E!%&*%))+C,S1113q#**5+Q4GC,>+QRs$56MM_`deh`i_jk!,#$ .11#2E2E2GH %&* )( ,Rs   Ccallbackc                      | |j                         |j                         ||      }t        j                  ||rt	        ||      nd      }||fS )a  
        Creates a new ``_WeakRefInfo`` instance and a weak reference to a ``torch.UntypedStorage`` object,
        optionally attaching a callback to the weak reference.

        Args:
            st (torch.UntypedStorage): The storage object for which to create the weak reference info.
            device (torch.device): The device associated with the storage object.
            reftype (_RefType): The type of reference, used to categorize the storage.
            callback (Optional[Callable[[Self, weakref.ref]]]): A callback function that is called when
                the storage object is about to be finalized (garbage collected). The callback function
                should accept two arguments: the ``_WeakRefInfo`` instance and the weak reference to the storage.
        Returns:
            Tuple[Self, weakref.ref]: A tuple containing the newly created ``_WeakRefInfo`` instance and the
            weak reference to the storage object. The weak reference may have an attached callback if provided.
        N)rU   rV   r   refr   )clsrd   rW   rX   r   winfow_sts          r,   create_winfoz_WeakRefInfo.create_winfo   sF    0 BGGIr0&'B{{28wx7Nd{r+   rM   )r&   r'   r(   r)   inttorchrW   r$   rQ   r[   UntypedStoragerf   staticmethodTensorr   r~   classmethodr   r	   r   r   r   r   r   r   r*   r+   r,   rT   rT      s	   ;;'*;49LL;KS;	;$
 
!e&:&: !s ! ) )U5I5I1J ) )8  BF    	
 8T7;;$7$<=> 
tW[[ 	! r+   rT   unitsrY   c           	      ~    ddddd}| |v r||    S t        d|  ddj                  |j                                      )	Nr    i   i   i   @)BKiBMiBGiBzUnsupported unit: z. Supported units are: z, )
ValueErrorjoinkeys)r   	unit_dicts     r,   _get_mem_divisorr      sR    eEBI	 '>tyyIY?Z>[\
 	
r+   valuedivisor	precisionc                 .    |dk(  r| S t        | |z  |      S )Nr    )round)r   r   r   s      r,   _rounding_fnr      s    qL5GeEGOY&GGr+   snapshotc                    t        |       dk(  rt        d       y t              | j                         D ]G  \  }}t	        |t
           d      dk  rt        d| gfd|j                         D        ddi I y )Nr   No memory tracked.rj   zDevice: c           	   3   P   K   | ]  \  }}d | dt        |d       d   yw)	z: rj    Nr   ).0kvr   r   s      r,   	<genexpr>z"_print_snapshot.<locals>.<genexpr>
  s:      Aq QCr,q'156aw?s   #&sep
)rn   printr   itemsr   
_TOTAL_KEY)r   r   devdev_snapr   s    `  @r,   _print_snapshotr      s    
8}"#u%G!) 

X,gq9Q>se	
$NN,	
 	


r+   c                 X   t        |       dk(  rt        d       y 	 ddlm} t	              g }t        t        t        | j                                     j                               }dg|D cg c]  }|  c}z   }| j                         D ]c  \  }}	t        |	t           d      dk  rt        |      g}
|
j                  fd|	j                         D               |j                  |
       e t         |||d	             y # t        $ r}t        d      |d }~ww xY wc c}w )
Nr   r   tabulate3Please install tabulate to use the tabulate option.Devicerj   c              3   B   K   | ]  }t        |d        d   yw)rj   r   Nr   )r   r   r   r   s     r,   r   z*_print_snapshot_tabular.<locals>.<genexpr>'  s&     Wl1gq12!E7;Ws   rstheaderstablefmt)rn   r   r   ImportErrorr   listnextitervaluesr   r   r   r   rR   rq   append)r   r   r   err
table_datakey_listkeyr   r   r   rowr   s    `         @r,   _print_snapshot_tabularr     s    8}"#%
 u%GJDhoo/016689Hj9seH99G!) X,gq9Q>3xj

WX__EVWW# 
(:w
?@  A
	 :s   D
 0
D'
	D$DD$rO   c                     | j                         D ]B  \  }}t        |        t        |      D ]#  \  }}t        d|dz    d       t        ||       % D t                y )Nz# r    :)r   r   	enumerater   )rO   r   statesnapshot_listir   s         r,   _print_state_snapshotsr   ,  sf     !* 1 -}$]3 	-KAxBq1ugQ- He,	--
 
Gr+   c           	         	 ddl m } g }d }t        |      }| j                         D ]  \  }}t	        |      D ]  \  }	}
| d|	dz    }|
j                         D ]s  \  }}t        |t           |d      dk  r||k7  r|ndt        |      d}|}|j                         D ]  \  }}t        ||d       d	| || <    |j                  |       u   t         ||d
d             y # t        $ r}t        d      |d }~ww xY w)Nr   r   r   z # r    rj    )zState & Callr   r   r   r   r   )
r   r   r   r   r   r   r   rR   r   r   )rO   r   r   r   r   last_state_callr   r   r   r   r   
state_callr   r   r   r   r   s                    r,   _print_state_snapshots_tabularr   7  s?   % JOu%G ) 1 '}$]3 	'KAx!7#a!eW-J!)!1 'X 4gqAQF '1O&C
!#h	 #-$NN, KDAq%1!Wa%@$A5'"JC1#KK!!#&'	''  
(:v
>?1  A
	s   C' '	D0C<<Dc                   D    e Zd Z e       Z e       Z e       Z e       Zy)_UpdateTypeN)r&   r'   r(   r   ADDDELREFSIZEr*   r+   r,   r   r   W  s      &C
&C
&C6Dr+   r   c                       e Zd ZdZd1dZ	 	 d2dededee   dee	   ddf
d	Z
	 d3d
ej                  de	dedee   fdZdedej"                  ddfdZd1dZd1dZdeddfdZde	d
ej                  ddfdZ	 d4dedeej4                  eeef   f   fdZ	 d5dej:                  dedeeef   fdZde defdZ!dej:                  de ddfdZ"dej:                  de de ddfdZ#dej:                  de ddfd Z$dej:                  de ddfd!Z%de	d"e&jN                  ddfd#Z(d1d$Z)d1d%Z*d&e+ej:                  e&jN                  ej                  f   ddfd'Z,	 d6ded(ed)eddfd*Z-	 d7d+ed(ed)eddfd,Z.d1d-Z/d8 fd.Z0de ddf fd/Z1d9d0Z2 xZ3S ):r"   a
  
    A TorchDispatchMode to track, categorize and attribute the tensor memory created or accessed within its context.

    It categorizes the tracked tensors as parameters, buffers, activations, gradients, temporary memory and optimizer states
    as defined by ``_MemRefType`` within its context. It captures memory `snapshots` for the modules, called within its context,
    at various states defined by ``_ModState``.

    Attributes:
        memory_tracking: A weakref key dictionary to store the memory statistics of each module. Each key
        is a reference to a module, and each value is a ``_ModMemStats`` object that stores the memory
        statistics of the module.

    Note:
        The MemTracker should be used as a context manager. The modules, optimizers, and any other tensors created within
        the context of MemTracker will be tracked by default. Any tensors or stateful objects such as modules, optimizers etc.
        that need to be tracked but are created outside the MemTracker should be registered using the `track_external` method.
        The `track_external` method should be called before the MemTracker is used. Any tensors created outside the ``MemTracker``
        and not supplied to the `track_external` method will not be tracked by the ``MemTracker``.

    Example usage:

        .. code-block:: python

            module = ...
            optimizer = ...
            inp = ...
            mem_tracker = MemTracker()
            mem_tracker.track_external(module, optimizer, inp)
            with mem_tracker as mt:
                loss = module(inp)
                print("After Forward:")
                mt.display_snapshot("current")
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
            mt.display_snapshot("peak")
            mt.display_modulewise_snapshots(depth = 3, units = "MiB")

    Known Limitations:
        - The ``MemTracker`` does not track memory for tensors that bypass the ``TorchDispatchMode`` ex. under ``no_dispatch``.
        - Resizing tensor storages directly by using non-Tensor methods other than using ``torch.Untyped_Storage.resize_``
          is not tracked. File a Github issue if you have use-cases for this.
        - If the tensors are not traceable or wrappable subclasses of ``torch.Tensor``, then the tracker does not know how to
            track their storages. File a Github issue if you have use-cases for this.
        - During AC in the backward pass there might be misattribution between activation and temp memory, but the peak memory
          will be tracked accurately. This will be fixed in the next update by hooking intricately with ``torch.uitls.checkpoint``.
    rY   Nc                 2   t               | _        i | _        i | _        i | _        t               | _        d | _        t               | _        t               | _	        t        | _        d| _        d| _        d | _        t        j                   j"                  | _        y NF)r   memory_tracking_curr_mem_snap	_peak_mem_peak_mem_snap_param_to_grad_hook_handles_optimizer_hook_handles_WINFOr   _mod_trackerr0   
_ref_class_in_opt_in_ac_ac_modr   r   resize__orig_resizerP   s    r,   rQ   zMemTracker.__init__  s    24BD24BD+>+@(  	$ *+&L*5"!.2!0088r+   u_typer   old_mem_consumedold_reftypec                    d}| j                   j                  |j                  t        j	                  | j
                  d            }|j                  t        d       |t        j                  k(  r>||j                  xx   |j                  z  cc<   |t        xx   |j                  z  cc<   n|t        j                  k(  r?||j                  xx   |j                  z  cc<   |t        xx   |j                  z  cc<   d}n|t        j                  k(  r=|J ||xx   |j                  z  cc<   ||j                  xx   |j                  z  cc<   n_|t        j                  k(  r>|J |j                  |z
  }||j                  xx   |z  cc<   |t        xx   |z  cc<   d}nt        d|       |r<| j                   |j                     t           dk(  r| j                   |j                  = y y y )NFr   TzInvalid update type: )r   
setdefaultrW   dictfromkeysr   r   r   r   rX   r\   r   r   r   r   )rP   r   r   r   r   
maybe_zeror   changes           r,   _update_snapzMemTracker._update_snap  s    
&&11LL$--;
 	J*[__$U]]#u'9'99#Z E$6$66 {&U]]#u'9'99#Z E$6$66 J{&***[!U%7%77!U]]#u'9'99#{'''#///''*::FU]]#v-#Z F* J4VH=>>""5<<0<A''5 B r+   rg   rX   update_existingc                 F   t         j                  |      }t               }|D ]  }| j                  j	                  |d      \  }}|L|j
                  }	|	|k7  r)||_        | j                  t        j                  ||	       |j                  |       p|rt        d      t         j                  ||j                  || j                        \  }}
||
f| j                  |<   |j                  dkD  r | j                  t        j                  |       |j                  |        |S )NNN)r   zNo existing winfo foundr   )rT   r~   rm   r   getrX   r   r   r   rw   KeyErrorr   rW   _delete_callbackr\   r   )rP   rg   rX   r   stswinfosrd   r   r|   r   r   s              r,   _update_and_maybe_create_winfosz*MemTracker._update_and_maybe_create_winfos  s    //2 	"B{{r<8HE1 #mm')$+EM%%koou+%V

5! 899 +77'4+@+@t $)$-B%%)%%koou=

5!1	"2 r+   r   c                 d    |j                   dkD  r!| j                  t        j                  |       y y )Nr   )r\   r   r   r   )rP   r   r   s      r,   r   zMemTracker._delete_callback  s+     !koou5 "r+   c                      t         j                        dt        j                  dt        dd f fd       }|t        j                  _        y )Nrd   rU   rY   c                 (   j                  | |       j                  j                  | d      \  }}|^|j                  | j                         k7  r@|j                  }|j                  |        j                  t        j                  ||       y y y )Nr   r   )	r   r   r   rU   r\   rf   r   r   r   )rd   rU   r   r|   r   rP   s        r,   r   z)MemTracker._track_resize.<locals>.resize_  s    b$'{{r<8HE1 UZZ2779%<#(#5#5 ))"-!!$$e>N "  &= r+   )r   r   r   r   r   r   )rP   r   s   ` r,   _track_resizezMemTracker._track_resize  sM     
t  	!	,, 	C 	D 	 
"	 (/$r+   c                 B    | j                   t        j                  _        y rM   )r   r   r   r   r   s    r,   _restore_resizezMemTracker._restore_resize  s    '+'8'8$r+   
peak_statec                 ~   | j                   }| j                  j                         D ]  }|j                  | j                  j
                  v s&||j                  v s5|j                         D ]`  \  }}|j                  j                  |d      |t           k  s-|t           |j                  |<   t        |      |j                  |   d   |<   b  |j                         D ]Z  \  }}| j                  j                  |d      |t           k  s-|t           | j                  |<   t        |      | j                  |<   \ y )Nr   )r   r   r   rK   r   parentsrO   r   rN   r   r   r   r   r   )rP   r  	curr_snap	mod_statsr   r   s         r,   _update_peak_statszMemTracker._update_peak_stats  s)    ''	--446 	I  D$5$5$=$==!4!44)2): X$//33C;hz>RR8@8LI005GO (HI//
;B?D	 '__. 	>MC~~!!#q)HZ,@@&.z&:s#+3H+=##C(	>r+   c                    t         j                  |      }|D ]  }| j                  j                  |d      \  }}|^|j                  |j	                         k7  r?|j
                  }|j                  |       | j                  t        j                  ||        y t         j                  ||j                  || j                        \  }}||f| j                  |<   |j
                  dkD  s| j                  t        j                  |        y )Nr   r   r   )rT   r~   r   r   rU   r\   rf   r   r   r   r   rW   r   r   )	rP   rX   rg   r   rd   r   r|   r   r   s	            r,   _trackzMemTracker._track)  s     //2 	>B{{r<8HE1 ::*','9'9$--b1%%#((%BR &  *77'4+@+@t $)$-B%%)%%koou=#	>r+   r_   c                     |dk(  rt        | j                        S |dk(  rt        | j                        S t        d|       )aE  
        Capture a snapshot of the memory usage breakdown per device, based on the specified type.

        Args:
            type (str): The type of snapshot to capture. Can be "current" for the current memory usage or "peak" for the
                        peak memory usage. Defaults to "current".
        Returns:
            Dict[torch.device, Dict[str, int]]: A dictionary where each key is a torch.device, and each value is another
                                                dictionary. This inner dictionary has keys representing memory reference
                                                types as defined in ``_MemRefType`` and values representing the amount of
                                                memory consumed in bytes.
        Raises:
            ValueError: If an invalid type is specified.
        currentpeakzInvalid type )r   r   r   r   )rP   r_   s     r,   get_tracker_snapshotzMemTracker.get_tracker_snapshotA  sG    " 9D//00V^D//00}TF344r+   moduleinstall_grad_hooksc                    
 dt         j                  dd f fd
d}|j                         D ]  } j                  |t        j
                        }|t        d |D              z  }|j                  * j                  |j                  t        j                          j                  j                  |d       |s|j                  
      }|j                  
fd      }||f j                  |<    d}|j                         D ]7  }	 j                  |	t        j                        }|t        d |D              z  }9 ||fS )NgradrY   c                 F    j                  | t        j                         y rM   )r   r0   r:   )r  rP   s    r,   
_grad_hookz?MemTracker._track_module_params_and_buffers.<locals>._grad_hooka  s    00  r+   r   c              3   4   K   | ]  }|j                     y wrM   r\   r   r   s     r,   r   z>MemTracker._track_module_params_and_buffers.<locals>.<genexpr>m  s     Gu 2 2G   c                 (     | j                         S rM   )r  )pr  s    r,   <lambda>z=MemTracker._track_module_params_and_buffers.<locals>.<lambda>y  s    z!&&1 r+   c              3   4   K   | ]  }|j                     y wrM   r  r  s     r,   r   z>MemTracker._track_module_params_and_buffers.<locals>.<genexpr>  s      H!3!3 Hr  )r   r   
parametersr   r0   r8   sumr  r:   r   r   register_hook"register_post_accumulate_grad_hookbuffersr9   )rP   r  r  param_memoryparamr   grad_hook_handlepost_acc_grad_hook_handlebuffer_memorybufferr  s   `         @r,    _track_module_params_and_buffersz+MemTracker._track_module_params_and_buffersY  sP   	U\\ 	d 	 &&( 	E99!!F CGGGGLzz%44JJ$$
 0044UDAI&#(#6#6z#B ,1,T,T2-) %-;007'	. nn& 	IF99""F S H HHHM	I m,,r+   argsc                 v     ddt         j                  dd f fd}t        t         j                  ||       S )Nr   rg   rY   c                     t         j                  |       }|D ]3  }j                  j                  |d      \  }}|%|j                  z  5 y )Nr   )rT   r~   r   r   r\   )rg   r   rd   r   r|   input_or_output_memoryrP   s        r,   add_inps_or_outsz=MemTracker._track_inputs_or_outputs.<locals>.add_inps_or_outs  sS    33A6C A;;??2|<q$*e.@.@@*Ar+   )r   r   r   )rP   r+  r/  r.  s   `  @r,   _track_inputs_or_outputsz#MemTracker._track_inputs_or_outputs  s<    !"	A 	A 	A 	ell$4d;%%r+   inputsc                 l   | j                   j                  |      }|J || j                  vrgt        |      }| j	                  |d      \  }}| j                  |      }||_        ||_        ||_        || j                  |<   t        j                  }n| j                   j                  rM| j                  |   }t        j                  }| j                  t        j                  |      | _        d| _        nwt#        | j                   j$                        |hz
  }	t'        |	      dk(  rd|	v rt)        d      | j                  |   }t        j                  }| j                  |      }||_        | j+                         }
|t        j                  k(  ri|
j-                         D ci c]  \  }}||t.            c}}|_        |j2                  j5                  t        j6                  g       j9                  |
       |j2                  j5                  |g       j9                  t;        |
             y c c}}w )NTr  r    GlobalzMemTracker does not support memory tracking for multiple iterative calls. Either use ``reset_mod_stats`` to clear module memory stats for the previous iteration or file a github issue if you need this feature.)r   get_known_fqnr   rJ   r*  r0  parameter_mem
buffer_mem	input_memr@   rA   is_bwrE   r   r   r   r   rm   r  rn   NotImplementedErrorr  r   r   rN   rO   r   rC   r   r   )rP   r  r1  mod_namer	  	param_memr7  r8  r   r  mem_snapshotr   r   s                r,   _pre_fw_hookzMemTracker._pre_fw_hook  s    $$226:###---$X.I$($I$I4 %J %!Iz 55f=I&/I##-I "+I+4D  ($$E$$,,V4I''E||#&{{62"$++334zAG7|q X%8)H 
 ,,V4I$$E55f=I"+I002I$$$?K?Q?Q?S$.;c8Xj))$I  **9+<+<bAHHV&&ub188,9OP	$s   %H0outputsc                    | j                   |   }| j                  j                  r=t        j                  }| j
                  I| j                         |u r7d | _        d| _        n(t        j                  }| j                  |      }||_	        |j                  j                  |g       j                  | j                                y r   )r   r   r9  r@   rF   r   r   rB   r0  
output_memrO   r   r   r  )rP   r  r1  r?  r	  r   rA  s          r,   _post_fw_hookzMemTracker._post_fw_hook  s     ((0	""((E||'DLLNf,D##%%E66w?J#-I &&ub1889R9R9TUr+   c                    |t        j                  dd       y | j                  |   }| j                         }|j	                         D ci c]  \  }}||t
            c}}|_        |j                  j                  t        j                  g       j                  |       |j                  j                  t        j                  g       j                  t        |             y c c}}w )Nz%Module is None. Skipping PRE_BW hook.rj   rl   )rt   ru   r   r  r   r   rN   rO   r   r@   rH   r   rD   r   )rP   r  r+  r	  r=  r   r   s          r,   _pre_bw_hookzMemTracker._pre_bw_hook  s     >MMAaP((0	002;G;M;M;O 
*7#xC*%% 
	 	&&y'8'8"=DD\R&&y'7'7<CC\"	
	 
s   C&c                     |t        j                  dd       y | j                  |   }|j                  j	                  t
        j                  g       j                  | j                                y )Nz&Module is None. Skipping POST_BW hook.rj   rD  )	rt   ru   r   rO   r   r@   rG   r   r  )rP   r  r+  r	  s       r,   _post_bw_hookzMemTracker._post_bw_hook  s^    
 >MMBqQ((0	&&y'8'8"=DD%%'	
r+   	optimizerc                     |j                   j                         D ]D  }|j                         D ]/  }t        |t        j                        s| j                  ||       1 F y rM   )r   r   
isinstancer   r   r   )rP   rX   rH  statesvals        r,   _track_optimizer_statesz"MemTracker._track_optimizer_states  sW      oo,,. 	F}} c5<<088	r+   c                      dt         j                  dt        dt        dd f fd}dt         j                  dt        dt        dd f fd}t        |      t	        |      f _        y )NrH  r+  kwargsrY   c                     d_         y )NT)r   rH  r+  rO  rP   s      r,   _opt_step_pre_hookzFMemTracker._register_global_optimizer_hook.<locals>._opt_step_pre_hook  s      DLr+   c                 T    j                  t        j                  |        d_        y r   )rM  r0   r=   r   rQ  s      r,   _opt_step_post_hookzGMemTracker._register_global_optimizer_hook.<locals>._opt_step_post_hook  s      (()D DLr+   )r   	Optimizerr   r   r   r   )rP   rR  rT  s   `  r,   _register_global_optimizer_hookz*MemTracker._register_global_optimizer_hook  ss    	 	 .1	 ;>	 	 
	!	!.1	!;>	!	! --?@-.AB(
$r+   c                 &   | j                   j                         D ]%  \  }}|j                          |j                          ' | j                   j                          | j                  )| j                  D ]  }|j                           d | _        y y rM   )r   r   removeclearr   )rP   r&  r'  handles       r,   %_deregister_param_and_optimizer_hooksz0MemTracker._deregister_param_and_optimizer_hooks   s     --446	/ 
%##%%,,.	/ 	((..0''366   +/D( 4r+   externalc                    t        |      \  }}|D ]  }t        |t        j                        r!| j	                  |t
        j                         >t        |t        j                  j                        r| j                  |d       vt        |t        j                        r!| j                  t
        j                  |       t        dt        |       d       y)a  
        Track tensors and stateful objects like modules, optimizers etc. that are created outside the MemTracker.

        This method should be called before the ``MemTracker`` is used. Any tensors that are not module parameters, buffers,
        gradients activations, or optimizer states will be categorized as ``Other``. If you want them categorized with a
        custom name, please file a GitHub issue. Any tensors created outside the MemTracker and not supplied to this
        method will not be be tracked by ``MemTracker``.

        Args:
            *external (Union[nn.Module, optim.Optimizer, torch.Tensor]): The external modules, optimizers, and
                                                                         tensors to be tracked.
        Fr3  zObject of type zj is not supported for tracking. Only stateful objects like modules, optimizers, and tensors are supported.N)r   rJ  r   r   r   r0   r>   r   Moduler*  r   rU  rM  r=   	TypeErrorr_   )rP   r\  flat_externalr|   rz   s        r,   track_externalzMemTracker.track_external.  s     (1q  	C#u||,44OO C155ce5TC1,,[__cB%d3i[ 1a b 	r+   r   r   c                 \    | j                  |      }|rt        ||       yt        ||       y)a'  
        Display the memory usage breakdown snapshot of the tracker based on the specified type and units.

        Keyword args:
            type (str): The type of snapshot to display. Can be "current" for the current memory usage or "peak" for the
                        peak memory usage. Defaults to "current".
            units (str): The units to use for displaying memory usage. Defaults to "B". Supports ["B", "KiB", "MiB", "GiB"].
            tabulate (bool): Whether to display the snapshot in a tabular format. Defaults to False.
        N)r  r   r   )rP   r_   r   r   r   s        r,   display_snapshotzMemTracker.display_snapshotN  s+     ,,T2#He4He,r+   depthc                 n   dt         dt        t        t        t         f      fdt	        | j
                  j                         fd      D ]e  }|j                  }|j                  d      dz   }||kD  r)t        d|        |rt        |j                  |       Pt        |j                  |       g y	)
aZ  
        Print per device memory breakdown snapshot for each module called within MemTracker.

        Snapshots are displayed for the states defined by ``_ModState``.
        The module hierarchy is displayed up to the specified depth.

        Keyword Args:
            depth (int, optional): The depth of the module hierarchy to display. Defaults to 2.
            units (str, optional): The units to use for memory tracking. Defaults to "B". Supports ["B", "KiB", "MiB", "GiB"].
            tabulate (bool, optional): Whether to display the snapshot in a tabular format. Defaults to False.
        srY   c                     t        j                  d|       D cg c]-  }|j                         rt        |      n|j	                         / c}S c c}w )Nz([0-9]+))resplitisdigitr   lower)rf  texts     r,   natural_sort_keyzAMemTracker.display_modulewise_snapshots.<locals>.natural_sort_keyo  sF     HHZ3 "\\^D	=  s   2Ac                 (     | j                         S rM   )rK   )m_statsrm  s    r,   r  z9MemTracker.display_modulewise_snapshots.<locals>.<lambda>w  s     0 A r+   )r   .r    z	Module:  N)rR   r   r   r   sortedr   r   rK   countr   r   rO   r   )rP   rd  r   r   r	  rK   	mod_depthrm  s          @r,   display_modulewise_snapshotsz'MemTracker.display_modulewise_snapshots`  s    	 	U38_(= 	    '')A
 	CI  ''Gc*Q.I5 IgY'(.y/B/BEJ&y':':EB	Cr+   c                 8    | j                   j                          y)z[
        Reset all the module memory stats. Clears ``memory_tracking`` dictionary.
        N)r   rY  r   s    r,   reset_mod_statszMemTracker.reset_mod_stats  s     	""$r+   c                    | j                          | j                  j                  | j                  | j                  | j
                  | j                         | j                          | j                         | _	        | j                  j                         D ci c]  \  }}||t            c}}| _        | j                  j                          t        | 5          | S c c}}w rM   )rV  r   register_user_hooksr>  rB  rE  rG  r  r  r   r   r   r   	__enter__super)rP   r   r   	__class__s      r,   ry  zMemTracker.__enter__  s    ,,.--		
 	"779;?;N;N;T;T;V
*7#xC*%%
 	##%
s   C!c                     | j                          | j                  j                          | j                          t	        |   |   | j                  j
                  |  y rM   )r[  r   clear_user_hooksr  rz  __exit__)rP   r+  r{  s     r,   r~  zMemTracker.__exit__  sP    224**,$"""D)r+   c                     ||i |xs i }| j                   rt        j                  }nC| j                  j                  r| j
                  st        j                  }nt        j                  }t        t        j                  t        | j                  |      |       | j                  j                  rt        j                  nt        j                  }| j!                  |       |S rM   )r   r0   r=   r   r9  r   r<   r;   r   r   r   r   r  r@   rH   rC   r
  )rP   functypesr+  rO  resrX   r  s           r,   __torch_dispatch__zMemTracker.__torch_dispatch__  s    D)FLb) <<!ooG$$T[[!&&G!ooGellGDKK$A3G*.*;*;*A*AY&&yGXGX

+
r+   )rY   Nr   )F)r  )T)r  r   F)rj   r   F)rY   r"   )r*   N)4r&   r'   r(   r)   rQ   r   rT   r   r   r$   r   r   r   boolr   r   r   r   r   r  r  r.   r
  r  rR   r
   rW   r  r   r^  r   r*  r   r0  r>  rB  rE  rG  r   rU  rM  rV  r[  r   ra  rc  rt  rv  ry  r~  r  __classcell__)r{  s   @r,   r"   r"   d  s   .`92 +/*.*6*6 *6 #3-	*6
 h'*6 
*6` !&	!<<! ! 	!
 
\	!F6l 6'++ 6$ 6/ 9>V > >.>h >5<< >D >2 $55	ellDcN*	+52 =A--ii--59--	sCx--^&S &S &5Q299 5Qc 5Qd 5QnVBII Vs VS VT V&
299 
C 
D 
&
BII 
S 
T 
		,1OO			
(0ryy%//5<<GH	B IN--,/-AE-	-& BG!C!C%(!C:>!C	!CF%"*c *d *r+   )Cr`   osrh  rt   copyr   enumr   r   	functoolsr   r   typingr   r	   r
   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   $torch.distributed._tools.mod_trackerr   torch.optim.optimizerr   r   torch.utils._python_dispatchr   r   torch.utils._pytreer   r   torch.utils.weakr   r   torch.utils.hooksr   r   environr   rb   r   __all__rR   r$   r.   r0   r@   rJ   rT   r   floatr   rW   r   r   r   r   r   r"   r*   r+   r,   <module>r     s&    	 	    $   #   ; < 9 1
 

?CDIDq  
.tsD tFS$ F( , 0W W<i iX
C 
C 
H Hc Hc HeE3J>O H
d5<<c3h#?@ 
 
QU 
$A5<<c3h/0A9<A	A4FDellDcN&B!CDDENQ	@FDellDcN&B!CDDE@NQ@	@@
$ 
K	" K	r+   