
    sgU                     j   d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	e	j                  j                  ZdZdZdZdZd	Zg d
Zedefd       ZdededefdZ	 ddedededededee   fdZdedededeegef   dedeee      fdZdededefdZ	 	 	 	 ddedededee   deeegef      deddfdZy)    )contextmanager)	timedelta)CallableIterableListOptionalNz/num_membersz/last_memberz/TRACEz/TRACING_GATE   )store_timeoutget_allsynchronizebarriertimeoutc              #      K   | j                   }| j                  t        |             d | j                  |       yw)z
    This sets the timeout and then restores the old timeout when the context
    manager exits.

    Args:
        store: the store to set the timeout on
        timeout: the timeout to set
    secondsN)r   set_timeoutr   )storer   old_timeouts      X/var/www/html/venv/lib/python3.12/site-packages/torch/distributed/elastic/utils/store.pyr
   r
      s5      --K	i01		k"s   >A rankprefix
world_sizec                     | j                  t        |      D cg c]  }| | 
 c}      }t        | || d      }|dk(  r| j                  |g       |S c c}w )aa  
    Given a store and a prefix, the method goes through the array of keys
    of the following format: ``{prefix}{idx}``, where idx is in a range
    from 0 to size, and tries to retrieve the data.

    The Rank0 process waits at the end to make sure all other processes
    finished the procedure before exiting.

    Usage

    ::

     values = get_all(store, 'torchelastic/data', 3)
     value1 = values[0] # retrieves the data for key torchelastic/data0
     value2 = values[1] # retrieves the data for key torchelastic/data1
     value3 = values[2] # retrieves the data for key torchelastic/data2

    z	/finishedr   r   
key_prefixr   )	multi_getrange_barrier_nonblockingwait)r   r   r   r   idxdata_arrbarrier_keys          r   r   r   .   si    & E*<MNS6(3% 0NOH&XY'K
 qy 	

K=!O  Os   Adatar   returnc                     t        | |      5  | j                  | | |       t        | |||      }|cddd       S # 1 sw Y   yxY w)aT  
    Synchronizes ``world_size`` agents between each other using the underlying c10d store.
    The ``data`` will be available on each of the agents.

    Note: The data on the path is not deleted, as a result there can be stale data if
        you use the same key_prefix twice.

    Time complexity: O(N) per worker, O(N^2) globally.
    N)r
   setr   )r   r#   r   r   r   r   
agent_datas          r   r   r   Q   sM    " 
ug	& 		ZL'.UD*jA
  s	   &=Arank_decodertrace_timeoutc                       j                   | t         d        fd} fd}|dk(  r# |       } j                   t         d       |S  |       S )N<val_ignored>c                  Z   t               } d}t        d      D ]c  }|t        k\  r | S 	 |dk(  r(j                   | t         gt                     n'j                   | t         gt        d             e | S # t        $ r |dz  }| j                   |             Y w xY w)Nr      r   )milliseconds)r&   r   _MAX_TRACE_MISSING_RANKSr   _TRACEr   DistStoreErroradd)missing_rank_inforanks_missingir   r(   r   r)   r   s      r   _find_missing_ranksz9_try_detecting_missing_ranks.<locals>._find_missing_ranksr   s    Eq*% 	7A  88 ! 
7 A%JJ&<s6(34i6V
 JJ:,qc& :;YTU=VW	7  !  " 7"!%%l1o67s   AB%B*)B*c                  r    	 j                    t         g       d d       dgS # t        $ r Y y w xY w)Nz[<check rank 0 (r   z) for missing rank info>])r   _TRACING_GATEr1   )r   r(   r   s   r   _checkinz._try_detecting_missing_ranks.<locals>._checkin   sJ    	JJ:,}o678&|A&77PQRR 		s   &* 	66r   )r&   r0   r8   )	r   r   r   r   r(   r)   r6   r9   r3   s	   ``` ``   r   _try_detecting_missing_ranksr:   h   sf     
IITF6(+_=! !* qy/1		ZL0/B  z    c                 |    |t         z   }|t        z   }| j                  |d      }||k(  r| j                  |d       |S )zq
    Does all the non-blocking operations for a barrier and returns the final key
    that can be waited on.
    r-   r+   )_NUM_MEMBERS_LAST_MEMBER_CHECKINr2   r&   )r   r   r   num_members_keylast_member_keyr    s         r   r   r      sE    
 !</O #77O
))OQ
'C
j		/?3r;   barrier_timeoutrank_tracing_decoderc                 `   |	|J d       t        | |      5  t        | ||      }	 | j                  |g       	 ddd       y# t        $ rT}||t	        | ||||xs d |      }	|	2t        dj                  |||ddj                  |	       d|            d|d}~ww xY w# 1 sw Y   yxY w)	as  
    A global lock between agents. This will pause all workers until at least
    ``world_size`` workers respond.

    This uses a fast incrementing index to assign waiting ranks and a success
    flag set by the last worker.

    Time complexity: O(1) per worker, O(N) globally.

    Optionally, passing rank will enable tracing of missing ranks on timeouts.
    `rank_tracing_decoder` lambda arg can be used to convert rank data
    into a more meaninful information at an app level (e.g. hostname).

    Note: Since the data is not removed from the store, the barrier can be used
        once per unique ``key_prefix``.
    Nz!Tracing requires rank informationr   c                     t        |       S )N)str)xs    r   <lambda>zbarrier.<locals>.<lambda>   s
    s1v r;   ziTimed out waiting on barrier on rank {}, for key prefix: {} (world_size={}, missing_ranks={}, timeout={})[z, ])r
   r   r   r1   r:   formatjoin)
r   r   r   rA   r   rB   r)   r@   emissing_rankss
             r   r   r      s    4 |#+P-PP+	uo	. .J:
	JJ()   	| <(>-=!! !,(ddjdj &&		- 89;+e	  	  G1	 s)   B$A	B!ABB!!B$$B-),  )rN   NN
   )
contextlibr   datetimer   typingr   r   r   r   torch_C_DistStoreErrorr1   r=   r>   r0   r8   r/   __all__floatr
   intrE   r   bytesr   r:   r   r    r;   r   <module>r[      s   &  5 5  ))% 	  A #% # #    c  s  R 
  	
   
%[.,, , 	,
 C5#:&, , hsm,^C S S & !;?;; ; 	;
 3-; #8SE3J#78; ; 
;r;   