
    sgL+                         d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ  e	d       G d	 d
             Z G d de      Zy)    )BaseBackend	GPUTarget)irpassesllvmamd)	dataclass)AnyTupleN)PathT)frozenc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	e
ed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZee   ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zd Zy)
HIPOptions   	num_warps   waves_per_eur   
num_stagesnum_ctasNextern_libs)r   r   r   cluster_dimsFdebugarchallow_fp8e4nvallow_fp8e4b15ieeedefault_dot_input_precision)r   allowed_dot_input_precisionsTenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namec                    t        t              j                  dz  }| j                  i nt	        | j                        }d| j
                  v sd| j
                  v rdnd}t        j                  | d|       ddg}|D ]  }t        || d	z        ||<    t        j                  | d
t        |j                                      | j                  dkD  r| j                  | j                  dz
  z  dk(  sJ d       y )Nlibgfx10gfx11    @   	warp_sizeocmlocklz.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictr   object__setattr__strtupleitemsr   )selfdefault_libdirr   r,   libsr'   s         O/var/www/html/venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py__post_init__zHIPOptions.__post_init__#   s    h..6 ,,4b$t?O?O:P!TYY.'TYY2FBB	4i8 	AC">se3K#?@K	A4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&R    c           	          dj                  | j                  j                         D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )N_-utf-8)join__dict__r6   hashlibsha256encode	hexdigest)r7   namevalkeys       r:   hashzHIPOptions.hash0   s]    hh9L9L9NOID#4&#OP~~cjj12<<>> Ps   A4
)__name__
__module____qualname__r   int__annotations__r   r   r   r   r1   r   r5   r   boolr   r4   r   r   r   r   r   r   r    r!   r"   r#   r%   r;   rJ    r<   r:   r   r      s    IsL#JHcK#L%#E4D#M4 ND '--/9 %*9!d! !#!E3N$$)*!3*L#0?r<   r   c                        e Zd Zedefd       Zdeddf fdZdefdZd Z	d Z
d	 Zed
        Zed        Zed        Zed        Zed        Zed        Zd Z ej(                         d        Z xZS )
HIPBackendtargetc                      | j                   dk(  S )Nr$   )backend)rT   s    r:   supports_targetzHIPBackend.supports_target7   s    ~~&&r<   returnNc                 j    t         |   |       t        |j                  t              sJ d| _        y )Nhsaco)super__init__
isinstancer   r4   
binary_ext)r7   rT   	__class__s     r:   r\   zHIPBackend.__init__;   s+     &++s+++!r<   c                     d| j                   j                  i}|j                  t        j                  j                         D ci c]  }||v s|||    c}       t        di |S c c}w )Nr   rQ   )rT   r   updater   __dataclass_fields__keys)r7   optsargsks       r:   parse_optionszHIPBackend.parse_options@   sa    (())H)H)M)M)O]ASTX\S\QQZ]^!D!! ^s   	A(A(c                     |j                   |j                  |j                  |j                  d   |j                  d   |j                  d   fS )Nr   r      )r   r   sharedr   )r7   metadatas     r:   pack_metadatazHIPBackend.pack_metadataE   sO    OO!!!$!!!$!!!$
 	
r<   c                     t               }|S N)r1   )r7   codegen_fnss     r:   get_codegen_implementationz%HIPBackend.get_codegen_implementationO   s    fr<   c                 .    t        j                  |       y rn   )r   load_dialects)r7   ctxs     r:   rr   zHIPBackend.load_dialectsS   s    #r<   c                  P   t        j                  d      } | t        |       }|j                         r|S t        t              j
                  dz  }|j                         r|S t        d      }|j                         r|S t        d      }|j                         r|S t        d      )NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldz/ROCm linker /opt/rocm/llvm/bin/ld.lld not found)osgetenvr   is_filer/   r0   	Exception)lld_env_pathllds     r:   path_to_rocm_lldzHIPBackend.path_to_rocm_lldV   s     yy!67#|$C{{}
8n##&77;;=J./;;=J$%;;=JIJJr<   c                 v   t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j
                  j                  |       t        j
                  j                  |       |j                  |        | S rn   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dcerunmodrk   optionspms       r:   	make_ttirzHIPBackend.make_ttirj   s    __S[[)
!!"%..r2#''+))"-b!r"$$R(
s
r<   c                 T   t        j                  | j                        }|j                          t        j
                  j                  |d|j                   |j                  |j                  |j                         |j                  |        t        j                  | j                        }|j                          t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t         j                  j                  j#                  ||j                  |j$                  |j&                         t        j                  j                  |       t         j                  j                  j)                  |       t        j                  j+                  |d       |j,                  dk(  rgt!        j.                  |j                        rHt         j                  j                  j1                  |       t        j2                  j5                  |       t        j                  j+                  |d       t        j                  j                  |       t        j                  j7                  |       |j,                  dk7  r)t         j                  j                  j9                  |       t        j2                  j;                  |       t        j2                  j=                  |       |j                  |        | S )Nzhip:Tr   )r   r~   r   r   r   r   add_convert_to_ttgpuirr   r   r,   r   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr   add_accelerate_matmulr    r!   add_optimize_epilogueadd_optimize_dot_operandsr   has_matrix_core_featureadd_stream_pipeliner   r   add_reduce_data_duplicationadd_reorder_instructionsr   r   r   s       r:   
make_ttgirzHIPBackend.make_ttgiry   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00W\\7C_C_ahanano44R8

00400T:"s'B'B7<<'PJJ2226MM++B/00T:44R82226"JJ77;b!$$R(
s
r<   c                 r	   | }t        j                  |j                        }|j                          t        j
                  j                  j                  ||j                         t
        j                  j                  |       t
        j                  j                  |       t
        j                  j                  |       d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j%                  |       t
        j                  j                  |       t
        j                  j!                  |       t
        j                  j'                  |       t(        j*                  j-                  dd      dk(  rt
        j.                  j1                  |       t        j
                  j                  j3                  |       |j5                  |       t7        j8                          t7        j                         }t7        j:                  ||      }t	        j<                  ||j                         t	        j>                  |d       t	        j@                  |dd       t	        j@                  |dd       t	        j@                  |dd       t	        j@                  |d	|jB                  d
k(         |jE                         D cg c]  }|jG                         r| }	}|	d   jI                  t        jJ                         |	d   jM                  dd|jN                  |jB                  z          |	d   jM                  d|jP                          |jR                  rdnd}
|	d   jM                  d|
       |jT                  rK|jT                  D cg c]  \  }}t	        jV                  ||      s|  }}}t7        jX                  ||       t7        jZ                  |t6        j\                  t        j^                         | ja                  d      |d<   t	        jb                  |       te        |      S c c}w c c}}w )NTTRITON_DISABLE_LINE_INFO0i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64r+   r   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr   zdenormal-fp-math-f32ztriton_gpu.sharedrj   )3r   r~   r   r   r   r   r   %add_decompose_unsupported_conversionsr   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rv   environgetllvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleset_isa_versionset_abi_versionset_bool_control_constantr,   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr   r   r"   r   need_extern_liblink_extern_libsoptimize_moduleOPTIMIZE_O3TARGET_TRIPLEget_int_attrcleanup_bitcode_metadatar4   )srcrk   r   r   r   _HIPBackend__HIP_FTZr   llvm_modfnfnsdenormal_moderG   pathpathss                 r:   	make_llirzHIPBackend.make_llir   sf   __S[[)


@@W\\R$$R(**2.11"5 	

((W\\9E''+b!''+**2.''+b!$$R(::>>4c:cAMM&&r* 	

55b9
s 	,,.>>#w/ 	Hgll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224PbB<M<M<OrPPA > >?A8Bw?P?PQXQbQb?b>c:deA0W5I5I4JL+2+E+E6A1=A.5.A.AiltTSEXEXYacgEhTiEi!!(E2Xt'7'79J9JK !--.AB$$X.8}% Q js   >R.R.R3*R3c           	      N   t        j                  d|       }t        |      dk(  sJ |d   |d<   t        j                  | t
        j                  |j                  dg |j                  d      }t        j                  j                  dd      d	k(  rt        d
       t        |       |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rG    FAMDGCN_ENABLE_DUMPr   1z!// -----// AMDGCN Dump //----- //)refindalllenr   translate_to_asmr   r   r   r   rv   r   r   print)r   rk   r   namesamdgcns        r:   make_amdgcnzHIPBackend.make_amdgcn   s    
 

QSVW5zQ 8&&sC,=,=w||RQSU\UmUmotu::>>.4;56&Mr<   c                 d   t        j                  | |j                  d      }t        j	                         }t        j                         5 }t        j                         5 }t        |j                  d      5 }|j                  |       d d d        t        j                  |ddd|j                  d|j                  g       d d d        t        |j                  d      5 }|j                         }	d d d        d d d        	S # 1 sw Y   zxY w# 1 sw Y   NxY w# 1 sw Y   +xY w# 1 sw Y   	S xY w)Nr   wbz-flavorgnuz-sharedz-orb)r   assemble_amdgcnr   rS   r|   tempfileNamedTemporaryFileopenrG   write
subprocess
check_callread)
r   rk   r   rZ   	rocm_pathtmp_outtmp_infd_infd_outrets
             r:   
make_hsacozHIPBackend.make_hsaco   s   ##Cr://1	((* 	$g,,. q&&++t, 'KK&'%%y)UIv{{\`bibnbn&opq gllD) $Vkkm$	$ 
' 'q q$ $	$ 
sT   
D%D6D8D D%D/D%D
DD	D%D"	D%%D/c                 b      fd|d<    fd|d<    fd|d<    fd|d<    fd	|d
<   y )Nc                 *    j                  | |      S rn   )r   r   rk   r   r7   s     r:   <lambda>z'HIPBackend.add_stages.<locals>.<lambda>       t~~c8W/U r<   r   c                 *    j                  | |      S rn   )r   r   s     r:   r   z'HIPBackend.add_stages.<locals>.<lambda>       Xw0W r<   ttgirc                 *    j                  | |      S rn   )r   r   s     r:   r   z'HIPBackend.add_stages.<locals>.<lambda>   r   r<   llirc                 *    j                  | |      S rn   )r   r   s     r:   r   z'HIPBackend.add_stages.<locals>.<lambda>   s    1A1A#xQX1Y r<   r   c                 *    j                  | |      S rn   )r   r   s     r:   r   z'HIPBackend.add_stages.<locals>.<lambda>  r   r<   rZ   rQ   )r7   stagesr   s   ` `r:   
add_stageszHIPBackend.add_stages   s1    UvWwUvYxWwr<   c                 z    t        j                  t        j                         dgd      }| d| j                   S )Nz	--versionr@   )encodingr?   )r   check_outputrS   r|   rT   )r7   versions     r:   rJ   zHIPBackend.hash  s8    )):+F+F+H+*Vahi!DKK=))r<   )rK   rL   rM   staticmethodr   rW   r\   r
   rg   rl   rp   rr   r|   r   r   r   r   r   r   	functools	lru_cacherJ   __classcell__)r_   s   @r:   rS   rS   5   s    '	 ' '"y "T "
"S "

 K K&    8 F FP    X Y* *r<   rS   )triton.backends.compilerr   r   triton._C.libtritonr   r   r   r   dataclassesr	   typingr
   r   rC   r   rv   r   r   r   pathlibr   r   rS   rQ   r<   r:   <module>r     sV    ; 5 5 !    	 	    $#? #? #?LQ* Q*r<   