
    sg?                        d dl Z d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZmZ g dZ e j                  e      Z	 dZ G d d      Z G d	 d
e      Z e ej(                  d      d       Zd Z G d d      Z G d d      Zd Z	 	 ddeedf   deeeef      dedeeedf      deeeef      deee   ee   f   fdZdee   fdZy)    N)AnyDictListOptionalTuplemap_aggregate)tree_flattentree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       e Zd ZdZd Zy)_CustomReducera$  
    Custom reducer class that can be used to specify a custom operation that
    reduces losses of multiple microbatches into one value.

    Example:
    >>> # xdoctest: +SKIP
    >>> sum_reducer = _CustomReducer(
    >>>     torch.tensor(0.0),
    >>>     lambda a, b: a + b
    >>> )
    c                      || _         || _        y N)
init_value	reduce_fn)selfr   r   s      Z/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/pipelining/microbatch.py__init__z_CustomReducer.__init__(   s    $"    N)__name__
__module____qualname____doc__r    r   r   r   r      s    
#r   r   c                       e Zd Zy)_LossReducerNr   r   r   r   r   r   r   r   -       r   r   g        c                     | |z   S r   r   )abs     r   <lambda>r%   1   s
    1q5 r   c                   n    e Zd ZU dZd Zeed<   d Zd Ze	de
edf   fd       Ze	deeef   fd	       Zy
)r   z2
    Class used to specify chunking of inputs
    c                     || _         y r   	split_dim)r   r)   s     r   r   zTensorChunkSpec.__init__=   s	    "r   r)   c                 |    | j                   j                   d| j                   j                   d| j                   dS )N.())	__class__r   r   r)   r   s    r   __repr__zTensorChunkSpec.__repr__B   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    d| j                    dS )NzTensorChunkSpec(r-   r(   r/   s    r   __str__zTensorChunkSpec.__str__G   s    !$..!133r   
chunk_dims.c                      t        | d       }|S )a  
        A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
        dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # There are three positional arguments to the model, and
            >>> # we are chunking them along dimension 0, 0 and 1, respectively
            >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
        c                     t        |       S r   r   dims    r   r%   z,TensorChunkSpec.from_tuple.<locals>.<lambda>Y       , r   r   )r3   args_chunk_specs     r   
from_tuplezTensorChunkSpec.from_tupleJ   s     (,
 r   c                      t        | d       }|S )a\  
        A helper for creating a dictionary of `TensorChunkSpec` from a
        dictionary of chunk dimensions (int's).
        Example:
            >>> # xdoctest: +SKIP
            >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
            >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
        c                     t        |       S r   r6   r7   s    r   r%   z+TensorChunkSpec.from_dict.<locals>.<lambda>k   r9   r   r   )r3   kwargs_chunk_specs     r   	from_dictzTensorChunkSpec.from_dict]   s     *,
 ! r   N)r   r   r   r   r   int__annotations__r0   r2   staticmethodr   r;   r   strr?   r   r   r   r   r   8   se    # N

4 #s(O $ !cN! !r   r   c                       e Zd Zy)
_ReplicateNr    r   r   r   rE   rE   q   r!   r   rE   c                     i }g }|}d}t        |       t        |      k(  s;J dt        | j                                dt        |j                                       | j                         D ]G  \  }}t	        |      \  }	}
|j                  |
       ||   }|J t	        |      \  }}t        |	      t        |      k7  rt        d| d|       g }t        |	|      D ]  \  }}|t        u st        |t        j                        s|j                  |g|z         ?t        |t              rqt        |t        j                        s
J | d       |j                  |j                        }||k  r9|r"t        j!                  d| d	| d
| d       |}nt#        d| d| d| d      t        j$                  |||j                        }t&        rg }d}|D ]  }t        j(                  |      }||j                  |j                        z   }t+        ddd      g|j,                  z  }t+        ||      ||j                  <   |||<   |j                  |       ||j                  |j                        z  } |j                  |       n|j                  |       d}t/        d|        |||<   J g }t1        |      D ]O  }i }|j                         D ]'  \  }}g }|D ]  }|j                  ||           |||<   ) |j                  |       Q g }|D ]b  } i }!t        |      t        |       k(  sJ t        | j                         |      D ]  \  \  }}}"t3        ||"      |!|<    |j                  |!       d |S )aW  
    Given a dictionary of args, and a dictionary of chunking specs, shard the
    args according to the chunking specs.

    Args:
        args_dict: Dictionary of args
        args_chunk_spec: Dictionary of chunking specs
        num_chunks: Number of chunks to shard the args into

    Returns:
        args_split: List of sharded args
    Tzargs_dict.keys() = z args_chunk_spec.keys() = NzArgument value z9 did not have the same number of values as as chunk spec z is not a tensorz%Tensor size on chunking dimension is z', downsizing the number of chunks from z to r+   zArg z% on chunking dimension has a size of z$, smaller than the number of chunks z. PiPPy cannot reduce the number of chunks because other arguments have bigger chunk-dimension sizes. Please adjust your num_chunks setting.r   FzUnrecognized chunk spec: )lenlistkeysitemsr
   append
ValueErrorziprE   
isinstancetorchTensorr   sizer)   loggerwarningRuntimeErrortensor_split_debug_mask_minibatches
zeros_likeslicendim	TypeErrorranger   )#	args_dictr:   
num_chunksargs_sharded_replicated	arg_specsreal_num_chunksfirst_tensorarg_keyargflatspec
chunk_specchunk_spec_flat_sharded_arg_flatvchunk_vv_split_dim_sizechunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indiceschunks_flat	chunk_idx
chunk_argskeyarg_single_chunkv_flat
args_splitchunkper_chunk_argsarg_specs#                                      r   _shard_dict_of_argsr~   u   s#   ( !I OLy>S  n	T).."2344NtTcThThTjOkNlmn  ") I<!#&
d$W-
%%%)*5t9O,,!# '++5,8 
 dO4 8	GJAw*$Jq%,,,G ''o(=>G_5 "!U\\2Jqc9I4JJ2#$66'*;*;#< #o5# CDTCU VDDN<tTdSeefh +;*"7)+PQaPb cAAK MEE  !& 2 2(9(9! +&(O$%M(5 N"'"2"21"5$1L4E4EgFWFW4X$X	).tT4)@(AGLL(P;@)9<g&7&78 2>.'..w7%):):7;L;L)MMN %++O<$++M:$";G9 EFFq8	Gt ,<(SI<X K?+ '	
/557 	/HC! ; ''y(9:;.JsO		/
 	:&' J *9~U+++$'y$A 	@ JS#"0h"?N3	@.)* r   args.kwargschunksr:   r>   returnc                   	 |i }|t        t              ft        |       z  }|#t        j	                  |t        t                    }t        t        t        |             t        t        |            |      }t        |      }t        |||      }t        |      |k  r<t        |      }t        t        t        |             t        t        |            |      }t        |      t        |      k7  r#t        dt        |       dt        |             g }|D ]7  	|j                  t        	fdt        t        	            D                     9 ||fS )a  
    Given a sequence of args and kwargs, split them into a number of chunks
    according to  their respective chunking specs.

    Args:
        args: Tuple of args
        kwargs: Dict of kwargs
        chunks: Number of chunks to split the args and kwargs into
        args_chunk_spec: chunking specs for args, in same shape as args
        kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

    Returns:
        args_split: List of sharded args
        kwargs_split: List of sharded kwargs
    z;args and kwargs are split into different number of chunks: z, c              3   (   K   | ]	  }|     y wr   r   ).0irv   s     r   	<genexpr>z0split_args_kwargs_into_chunks.<locals>.<genexpr>Y  s     N!
1Ns   )r   DEFAULT_CHUNK_DIMrG   dictfromkeysr~   	enumeraterT   rK   tupler[   )
r   r   r   r:   r>   args_split_dictr`   kwargs_splitrz   rv   s
            @r   r   r      sY   p ~ *+<=?#d)K  MM&/BS2TU)Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	

 J% P
%NuS_7MNNOP |##r   c                    |t        |      \  }}n-t        | d         \  }}t        t              gt        |      z  }g }| D ]I  }t        |      \  }}t        |      t        |      k7  rt	        d| d|       |j                  |       K g }	t        |      D ]  \  }
}t        |t              rft        t        |            D cg c]
  }||   |
    }}t        r|d   j                  }|dd D ]  }|j                  |k(  rJ  t        j                  t        j                  |ddit        |      |j                        }g }d}t        |      t        |      k(  sJ t        ||      D ]o  \  }}||j!                  |j                        z   }t#        ddd      g|j$                  z  }t#        ||      ||j                  <   ||   }|j                  |       |}q n|}|	j                  t        j&                  ||j                  	             ~t        |t(              rP|j*                  }t        t        |            D ]  }|j-                  |||   |
         } |	j                  |       |d   |
   }t        dt        |            D ]  }||   |
   |k(  rJ  |	j                  |         t/        |	|      S c c}w )
z
    Given a list of chunks, merge them into a single value according to
    the chunk spec.

    Args:
        chunks: list of chunks
        chunk_spec: Chunking spec for the chunks

    Returns:
        value: Merged value
    Nr   zChunk z did not match chunk spec    devicemeta)sectionsr8   r7   )r
   r   r   rG   rL   rK   r   rN   r[   rV   shaperO   rU   emptyr)   rM   rQ   rX   rY   catr   r   r   r   )r   rf   spec_flattenedflatten_specchunk0_flatchunks_flattenedr{   chunk_flattenedrh   args_flattenedarg_idxrc   ru   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxrs   slicedreduced_valvalues                             r   r   r   ^  s   Z '3J'?$ %1$;!\)*;<=K@PP  1)%03~#66veW,FzlSTT01 N!.1 0)c?+ "'s+;'<!= !+G4N 
 ' .q 1 7 7)!"- 6C995556#00KK>v> 0 !#"#>*c+.>>>>14^[1Q 4-M:$3joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O4 !/!!%))Ms}}"MN^,..K"3'7#89 	!mm!1)!<W!E
 !!+.$Q'0E"1c*:&;< E	'	27;uDDDE!!%(a0)f .,77cs   
K	)NN)loggingtypingr   r   r   r   r   rO   torch.fx.noder	   torch.utils._pytreer
   r   __all__	getLoggerr   rR   rV   r   r   tensorsum_reducerr   r   rE   r~   rC   r@   r   r   r   r   r   <module>r      s;    3 3  ' < 
		8	$
   # #$	> 	 <5<<,.@A  5! 5!r	 	~J >B>Be$
S/e$T#s(^$e$ e$ eOS$89:	e$
  S/%9 :;e$ 4;T
"#e$Pw8Iw8r   