
    sg0                        U d Z ddlZddlZddlZddlmZmZmZmZm	Z	 ddl
Z
ddlZddlZej                  j                         Zexr ej                  j!                         dk\  Zer ej$                  d      ndZer
 ed       Zn	 ed       Z ed       Z ed	       Z ed
       Z ed       Z ed       Z ed       Z ed       Z ed       Zd Zd Z ed       Z ed       Z d Z!d Z"d Z# ed       Z$e%e&d<    ed       Z'e%e&d<    ed       Z(e%e&d<    ed       Z)e%e&d<   exr e Z*e%e&d<    ed        Z+e%e&d!<   d" Z, ed#       Z-e%e&d$<   er 	 ddl.Z/e/j                  ja                         Z1nd%Z1d%a4d& Z5d' Z6ejn                  d(        Z8ejn                  d4d)       Z9d4d*Z:d+ Z;d, Z<d- Z=d. Z>d/ Z?d0 Z@ e?       ZA e@       ZBd1ej                  j                  dfd2ZEd1ej                  ej                  j                  dfd3ZGesej                  j                         rJ yy# e2$ rZ3d%Z1d%ZY dZ3[3dZ3[3ww xY w)5z>This file is allowed to initialize CUDA context when imported.    N)LazyVal
TEST_NUMBATEST_WITH_ROCM	TEST_CUDA
IS_WINDOWS   zcuda:0c                      t         S N)r        V/var/www/html/venv/lib/python3.12/site-packages/torch/testing/_internal/common_cuda.py<lambda>r      s     r   c                      t         xrB t        j                  j                  j	                  t        j
                  dt                    S )N      ?device)r   torchbackendscudnnis_acceptabletensorCUDA_DEVICEr   r   r   r   r      s0    !wu~~/C/C/Q/QRWR^R^_ajuRv/w r   c                  b    t         r(t        j                  j                  j	                         S dS )Nr   )
TEST_CUDNNr   r   r   versionr   r   r   r   r      s     zU^^%9%9%A%A%C WX r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)      r   cudais_availableget_device_capabilityr   r   r   r   r      +    ejj557hEJJ<\<\<^bh<h r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)   r   r   r   r   r   r   r      r#   r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)   r   r   r   r   r   r   r      r#   r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)r'   r   r   r   r   r   r   r      r#   r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)   r   r   r   r   r   r   r      r#   r   c                      t         j                  j                         xr! t         j                  j                         dk\  S )N)	   r   r   r   r   r   r   r       r#   r   c                      t         j                  j                         xr  t         j                  j                         dv S )N))r'   r   )r*   r'   r   r   r   r   r   r   "   s+    EJJ335p%**:Z:Z:\`p:p r   c                       t         r=t        j                  j                  d      j                   t         fddD              S y)Nr    c              3   &   K   | ]  }|v  
 y wr
   r   ).0archgcn_arch_names     r   	<genexpr>zCDNA2OrLater.<locals>.<genexpr>'   s     ^T4=(^s   >   gfx90agfx940gfx941gfx942F)r   r   r    get_device_propertiesgcnArchNameany)r2   s   @r   CDNA2OrLaterr;   $   s5    

88@LL^5]^^^r   c                     t         j                  j                         syt         j                  j                  d      j                  }t
        j                  j                  d|      }|| k(  S )NFr    /PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE)r   r    r!   r8   r9   osenvironget)matching_archr2   r1   s      r   evaluate_gfx_arch_exactrB   *   sO    ::""$JJ44V<HHM::>>K][D=  r   c                      t        d      S )Ngfx90a:sramecc+:xnack-rB   r   r   r   r   r   1       67OP r   c                      t        d      S )Ngfx942:sramecc+:xnack-rE   r   r   r   r   r   2   rF   r   c                  j    t         rt        d      xs t        d      S t        rt         xr t        S y)NrD   rH   F)r   rB   r   r   SM80OrLaterr   r   r   *evaluate_platform_supports_flash_attentionrK   4   s0    &'?@uD[\tDuu~-+-r   c                  N    t         rt        d      xs t        d      S t        ryy)NrD   rH   TF)r   rB   r   r   r   r   .evaluate_platform_supports_efficient_attentionrM   ;   s&    &'?@uD[\tDuur   c                  6    t          xr t        xr	 t        dk\  S )Ni_ )r   rJ   TEST_CUDNN_VERSIONr   r   r   *evaluate_platform_supports_cudnn_attentionrP   B   s    QKQ5G55PQr   c                      t               S r
   )rK   r   r   r   r   r   E   
    :d:f r   !PLATFORM_SUPPORTS_FLASH_ATTENTIONc                      t               S r
   )rM   r   r   r   r   r   F   s
    <j<l r   #PLATFORM_SUPPORTS_MEM_EFF_ATTENTIONc                      t               S r
   )rP   r   r   r   r   r   G   rR   r   !PLATFORM_SUPPORTS_CUDNN_ATTENTIONc                  .    t         xs t        xs t        S r
   )rS   rW   rU   r   r   r   r   r   I   s    :[ ;V2S;V2U r   !PLATFORM_SUPPORTS_FUSED_ATTENTIONPLATFORM_SUPPORTS_FUSED_SDPAc                      t         xr t        S r
   )r   rJ   r   r   r   r   r   O   s    y/H[ r   PLATFORM_SUPPORTS_BF16c                     t         j                  j                         rnt         j                  j                  r+dt         j                  j                  d      j                  v S t        xs! t         j                  j                         dk(  S y)Ngfx94r   )r*   r,   F)	r   r    r!   r   hipr8   r9   SM90OrLaterr"   r   r   r   evaluate_platform_supports_fp8ra   Q   s_    zz ==ejj>>qAMMMMN%**"B"B"D"NNr   c                      t               S r
   )ra   r   r   r   r   r   Y   s
    .L.N r   PLATFORM_SUPPORTS_FP8Fc                      t         sJ d       t        sIt        t        j                  j                               D ]  } t        j                  dd|          day y )Nz?CUDA must be available when calling initialize_cuda_context_rng   zcuda:r   T)r   __cuda_ctx_rng_initializedranger   r    device_countrandn)is    r   initialize_cuda_context_rngrk   l   sS    WWW9%uzz..01 	/AKKE!+.	/%)"	 &r   c                  z   t         j                  j                         rt         j                  j                  yt         j                  j	                  t         j                  j                               j                  dk  ryt        t         j                  j                  j                  d      d         dk  ryy)NFr*   .r      T)	r   r    r!   r   r8   current_devicemajorintsplitr   r   r   tf32_is_not_fp32rs   y   s~    ::""$(:(:(Bzz''

(A(A(CDJJQN
5==##C(+,r1r   c               #     K   t         j                  j                  j                  j                  } 	 dt         j                  j                  j                  _        t         j                  j
                  j                  d d d d      5  d  d d d        | t         j                  j                  j                  _        y # 1 sw Y   3xY w# | t         j                  j                  j                  _        w xY ww)NFenabled	benchmarkdeterministic
allow_tf32)r   r   r    matmulry   r   flags)old_allow_tf32_matmuls    r   tf32_offr}      s     !NN//66AAF05""-^^!!''TXej'k 		 1F""-	 	 1F""-s5   /C9AC B?C *C9?CC +C66C9c              #   8  K   t         j                  j                  j                  j                  }| j
                  }	 dt         j                  j                  j                  _        || _        t         j                  j                  j                  d d d d      5  d  d d d        |t         j                  j                  j                  _        || _        y # 1 sw Y   :xY w# |t         j                  j                  j                  _        || _        w xY ww)NTru   )r   r   r    rz   ry   	precisionr   r{   )selftf32_precisionr|   old_precisions       r   tf32_onr      s     !NN//66AANNM'04""-'^^!!''TXei'j 		 1F""-&		 	 1F""-&s5   ;DAC% C C% (1DC"C% %2DDc                 (     d  fdfd}|S )Nc                 P    t               5   |        d d d        y # 1 sw Y   y xY wr
   r}   )r   function_calls     r   with_tf32_disabledz+tf32_on_and_off.<locals>.with_tf32_disabled   s!    Z 	O	 	 	s   %c                 V    t        |       5   |        d d d        y # 1 sw Y   y xY wr
   )r   )r   r   r   s     r   with_tf32_enabledz*tf32_on_and_off.<locals>.with_tf32_enabled   s'    T>* 	O	 	 	s   (c                      t        j                         j                  }t        |j	                               t        j                          fd       }|S )Nc                  X   t        |       D ]
  \  }}||<    t               }dv r)|xr% t        j                  d         j                  dk(  }dv r)|xr% d   t        j
                  t        j                  hv }|r! d   fd        d   fd       y  di  y )Nr   r    dtyper   c                        di S Nr   r   fkwargss   r   r   zCtf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>   s    1;v; r   c                        di S r   r   r   s   r   r   zCtf32_on_and_off.<locals>.wrapper.<locals>.wrapped.<locals>.<lambda>   s    !+f+ r   r   )ziprs   r   r   typefloat32	complex64)	argsr   kvcond	arg_namesr   r   r   s	    `   r   wrappedz1tf32_on_and_off.<locals>.wrapper.<locals>.wrapped   s    It, 1q	#%D6!OfX.>!?!D!D!N& UU]]EOO4T!T"6&>3FG!&.2EFFr   )inspect	signature
parameterstuplekeys	functoolswraps)r   paramsr   r   r   r   s   `  @r   wrapperz tf32_on_and_off.<locals>.wrapper   sK    ""1%00&++-(				 
	 r   r   )r   r   r   r   s   ` @@r   tf32_on_and_offr      s    ( Nr   c                 B     t        j                          fd       }|S )Nc                  T    t               5   | i |cd d d        S # 1 sw Y   y xY wr
   r   )r   r   r   s     r   r   zwith_tf32_off.<locals>.wrapped   s*    Z 	&d%f%	& 	& 	&s   ')r   r   )r   r   s   ` r   with_tf32_offr      s%    __Q& & Nr   c                  ^   dt         j                  j                         vryt         j                  j                         j                  d      } t         j                  j                         | t	        d      z   d  j                  d      d   }t        d |j                  d      D              S )NMagmar   r   zMagma 
r   c              3   2   K   | ]  }t        |        y wr
   rq   r0   xs     r   r3   z%_get_magma_version.<locals>.<genexpr>   s     8AQ8   rm   )r   
__config__showfindlenrr   r   )positionversion_strs     r   _get_magma_versionr      s    e&&++--$$&++H5H""'')(S]*B*CDJJ4PQRSK8!2!23!7888r   c                      t         j                  j                  yt        t         j                  j                        } t	        d | j                  d      D              S )Nr   c              3   2   K   | ]  }t        |        y wr
   r   r   s     r   r3   z*_get_torch_cuda_version.<locals>.<genexpr>        9AQ9r   rm   )r   r   r    strr   rr   )cuda_versions    r   _get_torch_cuda_versionr      sE    }}!u}}))*L9!3!3C!8999r   c                      t         syt        t        j                  j                        } | j                  d      d   } t        d | j                  d      D              S )Nr   -r   c              3   2   K   | ]  }t        |        y wr
   r   r   s     r   r3   z*_get_torch_rocm_version.<locals>.<genexpr>   r   r   rm   r   r   r   r   r_   rr   r   )rocm_versions    r   _get_torch_rocm_versionr      sM    u}}(()L%%c*1-L9!3!3C!8999r   c                      t          S r
   )r   r   r   r   !_check_cusparse_generic_availabler      s    r   c                      t         syt        t        j                  j                        } | j                  d      d   } t        d | j                  d      D              }|d u xs |dk   S )NFr   r   c              3   2   K   | ]  }t        |        y wr
   r   r   s     r   r3   z5_check_hipsparse_generic_available.<locals>.<genexpr>  s     G!s1vGr   rm   )r   re   r   )r   rocm_version_tuples     r   "_check_hipsparse_generic_availabler      sf    u}}(()L%%c*1-LG|/A/A#/FGG"d*I.@6.IJJr   r    c                    t         j                  j                  t         j                  j                  dd      t         j                  j                  dd            j	                  |       }t         j                  j                  t         j                  j                  dd      t         j                  j                  dd            j	                  |       }t        j
                         5  t        |j                         |j                               D ]  \  }}|j                  |        	 d d d        ddi}||j                  |        ||j                         fi |} ||j                         fi |}	||||	fS # 1 sw Y   TxY w)Nr*   r   lrr   )
r   nn
SequentialLineartono_gradr   r   copy_update)
r   optimizer_ctoroptimizer_kwargsmod_controlmod_scalingcsr   opt_controlopt_scalings
             r   !_create_scaling_models_optimizersr   
  s>    ((%%ehhooa&;UXX__QPQ=RSVV^dVeK((%%ehhooa&;UXX__QPQ=RSVV^dVeK	 ..0+2H2H2JK 	DAqGGAJ	 C[F#&' !7!7!9DVDK !7!7!9DVDK[+== s   -AFFc           	         t        j                  d||       t        j                  d||       ft        j                  d||       t        j                  d||       ft        j                  d||       t        j                  d||       ft        j                  d||       t        j                  d||       fg}t         j                  j                         j	                  |       }d}t        | ||      |||fz   S )N)r*   r*   )r   r   r   )r   r   r   )r   ri   r   MSELossr   r   )r   r   r   r   dataloss_fn	skip_iters          r   _create_scaling_caser     s    [[uV<ekk&X]fl>mn[[uV<ekk&X]fl>mn[[uV<ekk&X]fl>mn[[uV<ekk&X]fl>mnpD
 hh ##F+GI,nGW	w	"# #r   )gh㈵>)H__doc__r   r   
torch.cuda$torch.testing._internal.common_utilsr   r   r   r   r   r   
contextlibr>   r    is_initialized"CUDA_ALREADY_INITIALIZED_ON_IMPORTrh   TEST_MULTIGPUr   r   r   rO   SM53OrLaterSM60OrLaterSM70OrLaterSM75OrLaterrJ   r`   	IS_JETSONr;   rB   GFX90A_ExactGFX942_ExactrK   rM   rP   rS   bool__annotations__rU   rW   rY   rZ   r\   ra   rc   
numba.cudanumbar!   TEST_NUMBA_CUDA	Exceptionerf   rk   rs   contextmanagerr}   r   r   r   r   r   r   r   r   TEST_CUSPARSE_GENERICTEST_HIPSPARSE_GENERICoptimSGDr   floatr   r   r   r   <module>r      s   F    k k   	 &+ZZ%>%>%@ " <ejj5571<(1lell8$t*+JwxJXY hihihihihihipq	! PQPQR +22f*g !4 g,34l,m #T m*12f*g !4 g*1 3V +W !4 W &/%E~3E d E&'HI  I &&NO t O **113
 O # * F F 
' 
'RJ9::K :; ;=  .4EKKOOfj >$ !'ekk%++//lp #  *zz((**** *Y  
s   H2 2I7I  I