
    sgp                        d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z
 d dlm	c mZ d dlmZ d dlmZmZ  e
j"                         s& edej&                          ej(                  d        edz   Z	 	 	 	 	 dd	ed
efdZddZddededefdZddeddfdZdeeef   fdZ	 ddededededdf
dZdededdfdZdedefdZ d Z!y)    N)partialwraps)Tuple_rref_context_get_debug_info)FILE_SCHEMATEST_WITH_TSANz"c10d not available, skipping tests)filez{file_name}	setup_rpcclean_shutdownc                 j      t        t              S t                fd       }|S )ak  
    We use this decorator for setting up and tearing down state since
    MultiProcessTestCase runs each `test*` method in a separate process and
    each process just runs the `test*` method without actually calling
    'setUp' and 'tearDown' methods of unittest.

    Note: pass the string representation of MessageTypes that should be used
    with the faulty agent's send function. By default, all retriable messages
    ("RREF_FORK_REQUEST", "RREF_CHILD_ACCEPT", "RREF_USER_DELETE",
    "CLEANUP_AUTOGRAD_CONTEXT_REQ") will use the faulty send (this default is
    set from faulty_rpc_agent_test_fixture.py).
    )r   r   faulty_messagesmessages_to_delayc                    dd l mc mc m} d|_        | j
                  | _        | j                         | j                  }
rt        r7t        j                  j                  dz  |_        dt        j                  _        t        j                  d| j
                  z  | j                  | j
                  | j                   |        	| g|i |}
rt        j"                         |S )Nr   F   <   zworker%d)namebackendrank
world_sizerpc_backend_options)graceful)torch.distributed.rpc.apidistributedrpcapi_ignore_rref_leakr   	worker_idsetup_fault_injectionr   r	   	constantsDEFAULT_RPC_TIMEOUT_SECrpc_timeoutDEFAULT_SHUTDOWN_TIMEOUTinit_rpcrpc_backendr   shutdown)selfargkwargsr   r   return_valuer   r   r   old_test_methodr   s         U/var/www/html/venv/lib/python3.12/site-packages/torch/testing/_internal/dist_utils.pynew_test_methodz"dist_init.<locals>.new_test_method8   s     	0/ %""?4EF"6625--2W2WZ[2[#/9;6LL$))+((YY??$7 't<c<V<LL.1    )r   	dist_initr   )r+   r   r   r   r   r-   s   ````` r,   r/   r/      sH    2 )+/
 	
 ? > r.   returnc                       y )N r2   r.   r,   noopr3   [   s    r.   r   expected_error_regexc                     	 	 t        j                  d|  t        d       t        j                  d       6# t
        $ r:}t        j                  |t        |            rt        |      cY d}~S Y d}~?d}~ww xY w)aC  
    Loops until an RPC to the given rank fails. This is used to
    indicate that the node has failed in unit tests.
    Args:
    rank (int): Rank of the node expected to fail
    expected_error_regex (optional, str): Regex of exception message expected. Useful to ensure a specific failure
    occurs, not just any.
    workerr2   )args皙?)patternstringN)	r   rpc_syncr3   timesleep	Exceptionresearchstr)r   r4   es      r,   wait_until_node_failurerC   _   sf     	LL6$$R8JJsO   	yy!5c!fE1v F	s   48 	A;*A6+A;6A;timeoutc                    t        j                          }	 t               }t        |d         }t        |d         }|dk(  r|dk(  ryt        j                  d       t        j                          |z
  | kD  rt	        d| d| d      s)	a2  
    The RRef protocol holds forkIds of rrefs in a map until those forks are
    confirmed by the owner. The message confirming the fork may arrive after
    our tests check whether this map is empty, which leads to failures and
    flaky tests. to_here also does not guarantee that we have finished
    processind the owner's confirmation message for the RRef. This function
    loops until the map is empty, which means the messages have been received
    as processed. Call this function before asserting the map returned by
    _get_debug_info is empty.
    num_pending_futuresnum_pending_usersr   r8   z:Timed out waiting to flush pending futures and users, had z pending futures and z pending usersN)r<   r   intr=   
ValueError)rD   start
debug_inforF   rG   s        r,   ,wait_until_pending_futures_and_users_flushedrL   q   s     IIKE
13
!*-B"CD
+> ?@!#(9Q(>

399;(*++@AR@SSac  r.   c                  2    t               } | d   }| d   }||fS )zf
    Retrieves number of OwnerRRefs and forks on this node from
    _rref_context_get_debug_info.
    num_owner_rrefs	num_forksr   )rref_dbg_info
num_ownersrO   s      r,   get_num_owners_and_forksrR      s,    
 12M01Jk*Iy  r.   rQ   rO   c                 V   t        j                          }	 t        j                  t        |      t        dd      \  }}t        |      }t        |      }|| k(  r||k(  ryt        j                  d       t        j                          |z
  |kD  rt        d| d|  d| d	| d| d
      )z
    Waits until timeout for num_forks and num_owners to exist on the rank. Used
    to ensure proper deletion of RRefs in tests.
    r2   r   )r7   rD   N   zTimed out waiting z	 sec for z owners and z forks on rank, had z forks)r<   r   r;   worker_namerR   rH   r=   rI   )rQ   rO   r   rD   rJ   num_owners_on_ranknum_forks_on_ranks          r,   #wait_until_owners_and_forks_on_rankrX      s     IIKE
037b!1
-- !!34 12+0AY0N

199;($WIYzl,yk Z*+<8I7J&R  r.   r   c                 `    t        j                         st        j                  d| ||       y y )Ngloo)r   init_methodr   r   )distis_initializedinit_process_group)r[   r   r   s      r,   initialize_pgr_      s-     #!		
 !r.   c                     d|  S )Nr6   r2   )r   s    r,   rU   rU      s    D6?r.   c                 T    | D cg c]  }||j                   v s| c}d   }|S c c}w )aQ  
    Returns the first event that matches partial_event_name in the provided
    function_events. These function_events should be the output of
    torch.autograd.profiler.function_events().

    Args:
    function_events: function_events returned by the profiler.
    event_name (str): partial key that the event was profiled with.
    r   )r   )function_eventspartial_event_nameevents      r,   get_function_eventre      s1     !0Tu3E3SUTUVWEL Us   %%)NTTNN)r0   N)z.*)   )"r?   sysr<   	functoolsr   r   typingr   torch.distributedr   r\   torch.distributed.rpcr   r   $torch.testing._internal.common_utilsr   r	   is_availableprintstderrexitINIT_METHOD_TEMPLATEboolr/   r3   rH   rA   rC   rL   rR   rX   r_   rU   re   r2   r.   r,   <module>rs      s6   
 
  $    # # > L t	
.SZZ@CHHQK #]2  BB BJ	# S C $# t 4!%S/ ! @B #+.9<	0
S 
c 
d 
c c r.   