
    sgd                     f   U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlm Z  d dl!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= g dZ>dZ?e@eAd<   e G d d             ZBe G d d             ZCdZDde@fdZE G d de      ZF G d deF      ZG G d deF      ZHd e2deIfd!ZJd"eId#ee2   deee2      fd$ZKd%ej                  d&eej                  ej@                  f   d'e2d(e@de9f
d)ZNd*ed+ej                  d,ej                  d-e1d.eId/ePd0eIddfd1ZQ G d2 d3e      ZR G d4 d5eR      ZS G d6 d7e8      ZT G d8 d9e7      ZU G d: d;eTe5      ZVy)<    N)ABCabstractmethod)contextmanager)	dataclass)Path)AnyCallablecastDict	GeneratorIOIterableIteratorListOptionalTupleUnion)Tensor)_get_available_device_type_get_device_module)narrow_tensor_by_index)MetadataMetadataIndexSTATE_DICT_TYPEStorageMeta)LoadItemTypeLoadPlanLoadPlannerReadItemSavePlanSavePlanner	WriteItemWriteItemType)BlockingAsyncStager)StorageReaderStorageWriterWriteResult)_create_file_view)Future)FileSystemWriterFileSystemReader
FileSystemFileSystemBase	.metadata_metadata_fnc                   0    e Zd ZU dZeed<   eed<   eed<   y)_StorageInfoz#This is the per entry storage info.relative_pathoffsetlengthN)__name__
__module____qualname____doc__str__annotations__int     Z/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/checkpoint/filesystem.pyr1   r1   B   s    -KKr=   r1   c                       e Zd ZU eed<   y)_StoragePrefixprefixN)r5   r6   r7   r9   r:   r<   r=   r>   r@   r@   K   s    Kr=   r@   z.distcpreturnc                  <    t        t        j                               S N)r9   uuiduuid4r<   r=   r>   _generate_uuidrG   S   s    tzz|r=   c                   v    e Zd Zedededdfd       Zedd       Zedee	e
j                  ef      fd       Zy)	_TensorLoadersizeobjrB   Nc                      y rD   r<   selfrJ   rK   s      r>   addz_TensorLoader.addX       r=   c                      y rD   r<   rN   s    r>   start_loadingz_TensorLoader.start_loading\   rP   r=   c                      y rD   r<   rR   s    r>   valuesz_TensorLoader.values`   rP   r=   rB   N)r5   r6   r7   r   r;   objectrO   rS   r   r   torchr   rU   r<   r=   r>   rI   rI   W   sk     & T     u||V';!<=  r=   rI   c                   h    e Zd ZdeddfdZdededdfdZd
dZde	e
ej                  ef      fd	Zy)_SerialCpuLoaderresolve_funrB   Nc                      || _         g | _        y rD   )r[   items)rN   r[   s     r>   __init__z_SerialCpuLoader.__init__f   s    &/1
r=   rJ   rK   c                 >    | j                   j                  ||f       y rD   )r]   appendrM   s      r>   rO   z_SerialCpuLoader.addj   s    

4+&r=   c                      y rD   r<   rR   s    r>   rS   z_SerialCpuLoader.start_loadingm       r=   c              #     K   | j                   D ]y  \  }}| j                  |      j                         }|j                         }|j	                         j                         |j                         k7  r|j                         }||f { y wrD   )r]   r[   detachcpustoragerJ   numelclonerN   _rK   tensors       r>   rU   z_SerialCpuLoader.valuesp   sz     jj 	FAs%%c*113FZZ\F~~$$&&,,.8 	s   B
BrV   )r5   r6   r7   r	   r^   r;   rW   rO   rS   r   r   rX   r   rU   r<   r=   r>   rZ   rZ   e   sS    2H 2 2' '& 'T '	u||V';!<= 	r=   rZ   c            	          e Zd Z	 	 ddedeej                     deddfdZe	de
fd       Zdeeej                  ef      fdZdd	Zdeeej                  ef      fd
ZdededdfdZddZdeeej                  ef      fdZy)_OverlappingCpuLoaderNr[   streaminflight_threshholdrB   c                 8   || _         g | _        || _        d| _        t	        j
                         | _        d| _        d| _        |r|j                  n	t               | _	        t        | j                        | _        t        t        j                  j                   |xs | j                  j#                               | _        | j$                  | j                  j#                         k7  r4| j$                  j'                  | j                  j#                                y y )Nr   F)r[   r]   ro   in_flight_datacollectionsdequecurrent_itemsidxstarteddevice_typer   r   device_moduler
   rX   cudaStreamcurrent_streamrn   wait_stream)rN   r[   rn   ro   s       r>   r^   z_OverlappingCpuLoader.__init__}   s     '/1
#6 0;0A0A0C"(F.H.J 	 00@0@AJJvL););)J)J)L
 ;;$,,;;==KK##D$6$6$E$E$GH >r=   c                 F    | j                   t        | j                        k\  S rD   )ru   lenr]   rR   s    r>   _donez_OverlappingCpuLoader._done   s    xx3tzz?**r=   c                    g }| j                   | j                  k\  r| j                  j                          | j                   | j                  k\  r| j                  j                         }| xj                   |d   j                         |d   j                         z  z  c_         |j                  |       | j                   | j                  k\  r|S Nr   )	rq   ro   rn   synchronizert   popleftrg   element_sizer`   )rN   drainedvals      r>   _drainz_OverlappingCpuLoader._drain   s    $":"::KK##%!!T%=%==$$,,.C3q6<<>CF4G4G4I#IINN3 !!T%=%== r=   c                    | j                   j                  | j                        5  | j                  s| j                  | j                  k  rm| j
                  | j                     \  }}| xj                  dz  c_        | j                  |      j                         }|j                  j                  | j                  k(  r|j                  dd      }nn|j                  t        j                  d      k(  rL|j                         j                         |j!                         |j"                  z  k7  r|j%                         }| j&                  j)                  ||f       | xj                  |j!                         |j+                         z  z  c_        | j                  s| j                  | j                  k  rmd d d        y # 1 sw Y   y xY w)N   re   T)devicenon_blocking)rx   rn   r   rq   ro   r]   ru   r[   rd   r   typerw   torX   untyped_storagerJ   rg   itemsizerh   rt   r`   r   ri   s       r>   _refillz_OverlappingCpuLoader._refill   sc   &&t{{3 	NjjT%8%84;S;S%SDHH-3A))#.557==%%)9)99#YYe$YGF]]ell5&99..0557!<<>FOO;< "("")) ##v||~8K8K8M'MM#) jjT%8%84;S;S%S	N 	N 	Ns   FGGc                     | j                   sJ t        | j                        dkD  r| j                  j	                          | j                  S r   )r   r~   rt   rn   r   rR   s    r>   _finishz_OverlappingCpuLoader._finish   s=    zzzt!!"Q&KK##%!!!r=   rJ   rK   c                 l    | j                   rt        d      | j                  j                  ||f       y )Nz&cannot add items after loading started)rv   RuntimeErrorr]   r`   rM   s      r>   rO   z_OverlappingCpuLoader.add   s+    <<GHH

4+&r=   c                     | j                   ry d| _         | j                  j                  t        j                  d             | j                          y )NTr   key)rv   r]   sortoperator
itemgetterr   rR   s    r>   rS   z#_OverlappingCpuLoader.start_loading   s9    <<

H//23r=   c              #      K   | j                          | j                  s7| j                         }| j                          |E d {    | j                  s7| j	                         E d {    y 7 *7 wrD   )rS   r   r   r   r   )rN   r   s     r>   rU   z_OverlappingCpuLoader.values   sY     **kkmGLLN **
 <<>!! !s*   AA3A/A3A3)A1*A31A3)Ni@B rV   )r5   r6   r7   r	   r   rX   rz   r;   r^   propertyboolr   r   r   r   rW   r   r   r   r   rO   rS   r   rU   r<   r=   r>   rm   rm   |   s     *.#,	II &I !	I
 
I. +t + +U5<<#789 N0"%f(<"=> "' '& 'T '
"u||V';!<= "r=   rm   itemc                     d}| j                   J | j                   j                  D ]  }||z  }	 | j                   j                  j                  }|t        j
                  j                  |      z  S Nr   )tensor_datarJ   
propertiesdtyperX   _utils_element_size)r   rJ   sr   s       r>   
_item_sizer      sp    D'''"" 	 ''--E%,,,,U333r=   binsr]   c                    | dk(  r|gS |D cg c]"  }|j                   t        j                  k(  s!|$ }}|D cg c]"  }|j                   t        j                  k7  s!|$ }}t        |       D cg c]  }g  }}t        |       D cg c]  }d }}|j	                  t
        d       t        |      D ]  \  }}||| z     j                  |        |D ]X  }t        t        |      t        j                  d            d   }	||	   j                  |       ||	xx   t        |      z  cc<   Z |S c c}w c c}w c c}w c c}w )Nr   r   T)r   reverser   )r   r#   BYTE_IOranger   r   	enumerater`   minr   r   )
r   r]   wibytes_wtensor_wrj   bucketsbucket_sizesiru   s
             r>   _split_by_size_and_typer      s3   qyw!FbRWW0E0E%ErFGF"Grbgg1F1F&FGHG27+%>Qb%>G%>$T{+!A+L+MMj$M/7# %2D  $%  ,)L)x/B/B1/EFqIBSZ^+	, N# GG%>+s!   "D3D3"D8D80	D=	Ern   data
write_itemstorage_keyc           	         | j                         }|j                  t        j                  k(  r<t	        |t
        j                        sJ | j                  |j                                nkt	        |t        j                        sJ |j                  t        j                  d      k(  sJ t        j                  |t        t        t           |              | j                         |z
  }t!        |j"                  |t%        |||            S )Nre   )indexsize_in_bytesstorage_data)tellr   r#   r   
isinstanceioBytesIOwrite	getbufferrX   r   r   saver
   r   bytesr'   r   r1   )rn   r   r   r   r3   r4   s         r>   _write_itemr      s     [[]F-///$

+++T^^%&$---{{ell51111

4bi01[[]V#F!+vv> r=   create_stream
file_queueresult_queueplannerro   	use_fsyncthread_countc           
         	 	 |j                         \  }}}	t        j                  j                         }
t	        t        |
d       }|dk(  rMt        j
                  j                         s|r-|j                         r|dkD  rt        |j                  |      }nt        |j                        }|	D cg c]"  }|j                  t        j                  k7  s!|$ }}|D ]  }|j                  t        |      |        |j                          |	D cg c]"  }|j                  t        j                  k(  s!|$ }}g } | |d      5 }|D ]0  }|j                  |      }|j!                  t#        ||||             2 |j%                         D ]0  \  }}|j&                  sJ |j!                  t#        ||||             2 |r$	 t)        j*                  |j-                                d d d        |j3                  |       c c}w c c}w # t.        $ r t)        j0                          Y Dw xY w# 1 sw Y   IxY w# t4        j6                  $ r Y y w xY w)Nr   r   )ro   wb)
get_nowaitrX   _C_get_privateuse1_backend_namegetattrry   is_availablerm   resolve_datarZ   r   r#   r   rO   r   rS   r`   r   rU   is_cpuosfsyncfilenoAttributeErrorsyncputqueueEmpty)r   r   r   r   ro   r   r   	file_namer   write_itemscustom_backend_namecustom_device_modloaderr   r   r   r   write_resultsrn   r   rk   s                        r>   _write_files_from_queuer     s,   82<2G2G2I/I{K #((("H"H"J '/BD I !JJ++-).?.L.L.N'!+.(((;
 *(( &1UrBGG}?T?T4TUHU& ?


:j1:>?  "$/Tb277m>S>S3SrTGTMy$/ "6") J"//
;D!((#FD*kJ +1--/ &FJ!==(=!((#FFJL "1"$ ]+k 6 V
 U& * "	"!" "& ;; sm   B/H7 1"G>G>8H7 "H3H7H7 A;H+ #H#%H7 H(%H+'H((H++H40H7 7IIc                   f   e Zd Zeedeeej                  f   dede	e
j                  ddf   fd              Zedeeej                  f   dedeeej                  f   fd       Zedeeej                  f   deeej                  f   ddfd	       Zedeeej                  f   deeej                  f   fd
       Zedeeej                  f   ddfd       Zeedeeej                  f   defd              Zedeeej                  f   defd       Zedeeej                  f   ddfd       Zy)r-   pathmoderB   Nc                      y rD   r<   )rN   r   r   s      r>   r   zFileSystemBase.create_streamV  s    
 	r=   suffixc                      y rD   r<   rN   r   r   s      r>   concat_pathzFileSystemBase.concat_path]       	r=   new_pathc                      y rD   r<   rN   r   r   s      r>   renamezFileSystemBase.renamec  r   r=   c                      y rD   r<   rN   r   s     r>   	init_pathzFileSystemBase.init_pathi      r=   c                      y rD   r<   r   s     r>   mkdirzFileSystemBase.mkdirm  r   r=   checkpoint_idc                      y rD   r<   clsr   s     r>   validate_checkpoint_idz%FileSystemBase.validate_checkpoint_idq  s     	r=   c                      y rD   r<   r   s     r>   existszFileSystemBase.existsv  r   r=   c                      y rD   r<   r   s     r>   rm_filezFileSystemBase.rm_filez  r   r=   )r5   r6   r7   r   r   r   r9   r   PathLiker   r   IOBaser   r   r   r   r   classmethodr   r   r   r   r<   r=   r>   r-   r-   U  s   #r{{*+36	299dD(	)  
 #r{{*+58	sBKK	  
 #r{{*+7<S"++=M7N	 
 eC$45 %R[[@P:Q   %R[[ 01 d   5bkk9I3J t    5bkk!12 t   E#r{{"23   r=   r-   c            
          e Zd Zedeeej                  f   dedee	j                  ddf   fd       Zdeeej                  f   dedeeej                  f   fdZdeeej                  f   deeej                  f   fdZdeeej                  f   d	eeej                  f   ddfd
Zdeeej                  f   ddfdZedeeej                  f   defd       Zdeeej                  f   defdZdeeej                  f   ddfdZy)r,   r   r   rB   Nc              #      K   t        t        |      j                  |      5 }t        t        j                  |       d d d        y # 1 sw Y   y xY wwrD   )r
   r   openr   r  )rN   r   r   rn   s       r>   r   zFileSystem.create_stream  sD      $""4( 	*Fryy&))	* 	* 	*s    AA	AAAr   c                 (    t        t        |      |z  S rD   )r
   r   r   s      r>   r   zFileSystem.concat_path  s     D$&((r=   c                 <    t        |t              st        |      }|S rD   )r   r   r   s     r>   r   zFileSystem.init_path  s    $%:Dr=   r   c                 ^    t        t        |      j                  t        t        |             y rD   )r
   r   r   r   s      r>   r   zFileSystem.rename  s      	T4T8 45r=   c                 F    t        t        |      j                  dd       y )NT)parentsexist_ok)r
   r   r   r   s     r>   r   zFileSystem.mkdir  s    T4td;r=   r   c                     t        |t              rydt        |      v ryt        |      j                  D ]B  }|j	                         st        j                  t        |      t
        j                        sB y y)NTz://F)r   r   r9   r
  r   r   accessW_OK)r   r   ps      r>   r   z!FileSystem.validate_checkpoint_id  s_    mT*C&&m$,, 	AxxzbiiA8	 r=   c                 >    t        t        |      j                         S rD   )r
   r   r   r   s     r>   r   zFileSystem.exists  s    D$&&((r=   c                 @    t        t        |      j                          y rD   )r
   r   unlinkr   s     r>   r   zFileSystem.rm_file  s    T4!r=   )r5   r6   r7   r   r   r9   r   r   r   r   r  r   r   r   r   r   r  r   r   r   r   r<   r=   r>   r,   r,     s~   *#r{{*+*36*	299dD(	)* *)#r{{*+)58)	sBKK	 )
eC$45 %R[[@P:Q 
6#r{{*+67<S"++=M7N6	6
<%R[[ 01 <d < 5bkk9I3J t  )5bkk!12 )t )"E#r{{"23 " "r=   r,   c                       e Zd ZdZ	 	 	 	 	 ddeeej                  f   dedede	de	dede
d	e
d
df fdZddeeej                  df   d
dfdZded
dfdZded
efdZdee   d
ee   fdZdeded
eee      fdZdedeee      d
dfdZd
ee   fdZed
eeej                  f   fd       Zed
eeej                  f   fd       Zedeeej                  f   d
efd       Z xZ S ) _FileSystemWritera  
    Basic implementation of StorageWriter using file IO.

    This implementation makes the following assumptions and simplifications:

    * The checkpoint path is an empty or non-existing directory.
    * File creation is atomic

    The checkpoint consist of one file per write request plus
    a `.metadata` file with the serialized metadata.

    r   single_file_per_rank
sync_filesr   per_thread_copy_ahead	overwriteargskwargsrB   Nc                     t         	|           t               | _        | j                  j	                  |      | _        || _        || _        || _        || _	        t               | _        || _        y)a  
        Initialize the writer pointing to `path`.

        Args:
            path: directory where the checkpoint will be written to.
            single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True.
            sync_files : force files to be synced to permanent storage. Default to True.
            thread_count: Number of IO threads to use to write. Default to 1.
            per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb.
            overwrite: Whether to allow overwriting existing checkpoints. Defaults to True.

        N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure.
        N)superr^   r,   fsr   r   r  r  r   r  rG   save_idr  )
rN   r   r  r  r   r  r  r  r  	__class__s
            r>   r^   z_FileSystemWriter.__init__  s_    0 	,GG%%d+	$8!$(%:"%'"r=   r   c                 f    |r | j                   j                  |      | _        t               | _        y rD   )r  r   r   rG   r  rN   r   s     r>   resetz_FileSystemWriter.reset  s%    ))-8DI%'r=   is_coordinatorc                      y rD   r<   )rN   r$  s     r>   set_up_storage_writerz'_FileSystemWriter.set_up_storage_writer  rb   r=   planc                 H   | j                   j                  | j                         | j                   j                  | j                        rW| j
                  r2t        j                  d| j                   d| j
                  d       |S t        d| j
                  d      |S )Nz#Detected an existing checkpoint in z#, overwriting since self.overwrite=z. Past version 2.5 of PyTorch, `overwrite` will default to False. Set this variable to True to maintain this functionality or False to raise when an existing checkpoint is found.z-Checkpoint already exists and self.overwrite=.)	r  r   r   r   metadata_pathr  warningswarnr   rN   r'  s     r>   prepare_local_planz$_FileSystemWriter.prepare_local_plan  s    dii 77>>$,,-~~9$:L:L9MMqbfbpbpar sk k  #%SDNNCTTU#VWWr=   plansc                     t        |      D cg c])  \  }}t        j                  |t        d| d            + }}}|S c c}}w )N__rj   r   )r   dataclassesreplacer@   )rN   r/  r   r'  	new_planss        r>   prepare_global_planz%_FileSystemWriter.prepare_global_plan  sQ     %U+
4 >Bqc)3LM
	 
 	
s   .Ar   c                    |j                   dfd}t        j                         }| j                  rgt	        | j
                  |j                        D ]C  } |       }| j                  j                  | j                  |      }|j                  |||f       E nS|j                  D ]D  } |       }| j                  j                  | j                  |      }|j                  |||gf       F t        j                         }	g }
t        d| j
                        D ]w  }t        j                  t        | j                  j                  ||	|| j                   | j"                  | j
                  f      }|j%                          |
j'                  |       y t        | j                  j                  ||	|| j                   | j"                  | j
                         |
D ]  }|j)                           g }	 	 ||	j+                         z  }# t        j,                  $ r  t/               }|j1                  |       |cY S w xY w)Nr   c                  >    j                     t         } dz  | S r   )rA   DEFAULT_SUFFIX)r   
file_countstorage_plans    r>   gen_filez._FileSystemWriter.write_data.<locals>.gen_file  s,    '../
|N;KLI!OJr=   r   )targetr  )r   r   r   r   ro   r   r   )r   r   Queuer  r   r   r]   r  r   r   r   r   	threadingThreadr   r   r  r  startr`   joinr   r   r)   
set_result)rN   r'  r   r<  r   bucketr   r   r   r   threadsrj   tresfutr:  r;  s                  @@r>   
write_dataz_FileSystemWriter.write_data  s    
 (,'8'8
	 #(++-
$$1$2C2CTZZP :$J	ww**499i@i89:
 

 :$J	ww**499i@i$89:
 %*KKMq$++, 	A  .GG)) ..OO%%A GGINN1	  	 ''//!% $ : :oo**	
  	AFFH	 	|..00 {{ 	-3XCNN3J	s   5H
 
0H=<H=metadataresultsc                 J   i }|D ]6  }|j                  |D ci c]  }|j                  |j                   c}       8 ||_        | j                         |_        t	        t
        | j                  j                  | j                  t         d            }| j                  j                  |d      5 }t        j                  ||       | j                  r$	 t        j                  |j!                                d d d        | j                  j'                  | j(                        r%| j                  j+                  | j(                         | j                  j-                  || j(                         y c c}w # t"        $ r t        j$                          Y w xY w# 1 sw Y   xY w)Nz.tmpr   )updater   r   storage_metar
   r   r  r   r   r/   r   pickledumpr  r   r   r   r   r   r   r*  r   r   )rN   rJ  rK  
storage_mdwr_listwrtmp_pathmetadata_files           r>   finishz_FileSystemWriter.finish>  s?   
 	MG7KRrxx8KL	M * $ 1 1 3dgg11$))~T=RSTWW""8T2 	mKK-0HH]1134		 77>>$,,-GGOOD../x!3!34% L & GGI	 	s/   E1
1#F#E66FFFFF"c                 D    t        | j                  | j                        S )N)r   r  )r   r   r  rR   s    r>   rN  z_FileSystemWriter.storage_metaU  s    ););T\\RRr=   c                 r    t        t        | j                  j                  | j                  t
                    S rD   )r
   r   r  r   r   r/   rR   s    r>   r*  z_FileSystemWriter.metadata_pathX  s$    D$''--diiFGGr=   c                     | j                   S )zT
        return the checkpoint_id that will be used to save the checkpoint.
        r   rR   s    r>   r   z_FileSystemWriter.checkpoint_id\      
 yyr=   c                 ,    t         j                  |      S rD   r,   r   r   s     r>   r   z(_FileSystemWriter.validate_checkpoint_idc      00??r=   )TTr   逖 TrD   )!r5   r6   r7   r8   r   r9   r   r   r   r;   r   r^   r#  r&  r    r.  r   r6  r!   r)   r'   rI  r   rV  r   r   rN  r   r*  r   r  r   __classcell__r   s   @r>   r  r    s     &*%/ #C$% # # # 	 #
  #  # #  #  #  # 
 #D(5bkk4)?#@ (D (
D T x H h DN AA A 
[!	"	AF5x 5$tK7H2I 5d 5.Sh{3 S HuS"++%56 H H uS"++%56   @5bkk9I3J @t @ @r=   r  c                   n    e Zd Zdeeej                  f   ddf fdZdede	j                  fdZddeeej                  df   ddfdZd	ed
eded   fdZdefdZdededdfdZd	edefdZdee   dee   fdZedeeej                  f   fd       Zedeeej                  f   defd       Z xZS )r+   r   rB   Nc                     t         |           t               | _        | j                  j	                  |      | _        i | _        t               | _        y rD   )	r  r^   r,   r  r   r   r   rG   load_id)rN   r   r   s     r>   r^   zFileSystemReader.__init__i  s?    ,GG%%d+	?A%'r=   sinfoc                 D    t        ||j                  |j                        S rD   )r(   r3   r4   )rN   filere  s      r>   _slice_filezFileSystemReader._slice_filep  s     u||U\\BBr=   r   c                 t    i | _         |r | j                  j                  |      | _        t	               | _        y rD   )r   r  r   r   rG   rd  r"  s     r>   r#  zFileSystemReader.resets  s-    ))-8DI%'r=   r'  r   c                    i }|j                   D ]H  }| j                  |j                     }|j                  }|j	                  |g       j                  |       J |j                         D ]  \  }}| j                  j                  | j                  |      }	| j                  j                  |	d      5 }
|D ]  }| j                  |j                     }| j                  |
|      }|j                  t        j                  k(  rRt        j                  |j!                  |j"                              }|j%                  d       |j'                  ||       t)        t*        t-        j.                  t)        t0        t2           |      dd            }t5        ||j6                  |j8                        }|j;                  |      j=                         }|j?                         |j?                         k(  s6J d|j                   d|j?                          d|j?                                 |jA                  |       |jC                  ||        	 d d d         tE               }|jG                  d        |S # 1 sw Y   xY w)	Nrbr   re   T)map_locationweights_onlyzreq z mismatch sizes z vs )$r]   r   storage_indexr2   
setdefaultr`   r  r   r   r   rh  r   r   r   r   r   readr4   seek
load_bytesr
   r   rX   loadr   r   r   storage_offsetslengthsresolve_tensorrd   rJ   copy_commit_tensorr)   rC  )rN   r'  r   per_file	read_itemitem_mdr   r2   reqsr   rn   req
file_slice
read_bytesrk   target_tensorrH  s                    r>   	read_datazFileSystemReader.read_datay  s?   .0 	<I''	(?(?@G((Db)00;	<
 $,>>#3 	BM4ww**499mDH&&x6 B& BC"//0A0ABG!%!1!1&'!BJxx<#7#77%'ZZ
0O%P
"***3
;!%"!JJ $RY
 ;-2-1" "8"C$7$7" )0(>(>s(C(J(J(L *..0FKKMAo!#"3"3!44D]EWEWEYDZZ^_e_j_j_l^mnoA%++F3--c=A3BB B	B> ht
?B Bs   3FI55I?	c                 L   | j                   j                  | j                  d      }| j                   j                  |d      5 }t	        j
                  |      }d d d        t        dd       t               |_        | j                  |j                  _	        |S # 1 sw Y   BxY w)Nr.   rk  rN  )
r  r   r   r   rO  rs  r   r   rN  rd  )rN   r   rU  rJ  s       r>   read_metadatazFileSystemReader.read_metadata  s    ww""499k:WW""4. 	2-{{=1H	2 8^T2:$/MH!(,%	2 	2s   BB#rJ  r$  c                 B    |j                   | _         | j                   J y rD   r2  )rN   rJ  r$  s      r>   set_up_storage_readerz&FileSystemReader.set_up_storage_reader  s"    $11  ,,,r=   c                     |S rD   r<   r-  s     r>   r.  z#FileSystemReader.prepare_local_plan  s    r=   r/  c                     |S rD   r<   )rN   r/  s     r>   r6  z$FileSystemReader.prepare_global_plan  s    r=   c                     | j                   S )zT
        return the checkpoint_id that will be used to load the checkpoint.
        rZ  rR   s    r>   r   zFileSystemReader.checkpoint_id  r[  r=   c                 ,    t         j                  |      S rD   r]  r   s     r>   r   z'FileSystemReader.validate_checkpoint_id  r^  r=   rD   )r5   r6   r7   r   r9   r   r   r^   r1   r   r  rh  r#  r   r   r)   r  r   r  r   r  r.  r   r6  r   r   r  r   r`  ra  s   @r>   r+   r+   h  s2   (U3#34 ( (C| C		 C(5bkk4)?#@ (D ()h ) ) )X	x 	-h - -QU -x H h DN  uS"++%56   @5bkk9I3J @t @ @r=   r+   c                        e Zd ZdZ	 	 	 	 	 	 ddeeej                  f   dedede	de	deded	d
f fdZ
ded	ef fdZ xZS )r*   r  r   r  r  r   r  cache_staged_state_dictr  rB   Nc           	      2    t         |   |||||||       y)aM  
        Initialize the writer pointing to `path`.

        Args:
            path: directory where the checkpoint will be written to.
            single_file_per_rank: Produce one file per rank instead of one file per tensor/blob. Default to True.
            sync_files : force files to be synced to permanent storage. Default to True.
            thread_count: Number of IO threads to use to write. Default to 1.
            per_thread_copy_ahead: How many bytes to copy from the GPU ahead of saving then. Default 10Mb.
            cache_staged_state_dict: Whether to cache the staged state_dict. This option decreases staging latency
                at the cost of increases memory usage. Additionally, if this parameter is set to True, it's the expectation
                that the stager is maintained and re-used for multiple dcp.async_save calls. Default to False.
            overwrite: Whether to allow overwriting existing checkpoints. Defaults to True.

        N. B. If sync_files is disabled, there's no guarantee that the checkpoint will be consistent in the case of a failure.
        )r   r  r  r   r  r  r  N)r  r^   )	rN   r   r  r  r   r  r  r  r   s	           r>   r^   zFileSystemWriter.__init__  s,    4 	!5!%"7$; 	 	
r=   
state_dictc                 0    d| _         t        | 	  |      S )zOverride of AsyncStager.stager   )r  r  stage)rN   r  r   s     r>   r  zFileSystemWriter.stage  s     &'"w}Z((r=   )TTr   r_  FT)r5   r6   r7   r8   r   r9   r   r   r   r;   r^   r   r  r`  ra  s   @r>   r*   r*     s      &*%/(-"
C$%"
 #"
 	"

 "
  #"
 "&"
 "
 
"
H) )O ) )r=   r*   )Wrr   r3  r   r   r   rO  r   r?  rE   r+  abcr   r   
contextlibr   r   pathlibr   typingr   r	   r
   r   r   r   r   r   r   r   r   r   rX   r   torch._utilsr   r   torch.distributed._shard._utilsr   %torch.distributed.checkpoint.metadatar   r   r   r   $torch.distributed.checkpoint.plannerr   r   r   r   r    r!   r"   r#   $torch.distributed.checkpoint.stagingr$   $torch.distributed.checkpoint.storager%   r&   r'   "torch.distributed.checkpoint.utilsr(   torch.futuresr)   __all__r/   r9   r:   r1   r@   r9  rG   rI   rZ   rm   r;   r   r   r  r   r   r>  r   r   r-   r,   r  r+   r*   r<   r=   r>   <module>r     s3     	  	      # % !       G B 	 	 	 E 
 A   Sc         C } .W"M W"t4Y 43 4# d9o $tIBW 0II


ELL(
)  	
 0AAA ++A 	A
 A A A 
AH'S 'T," ,"^w@ w@t[@} [@|7)(*= 7)r=   