
    sgF                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ ej                  j!                  ej                  j#                  e            Zej                  j'                  ed      gZd Z e j,                         d	        Zd
 Z G d de      Zd Zd Z G d de      Z G d de      Zy)    N)Path)_build)get_cache_manager)	GPUTarget)	GPUDriverincludec                     dd l }|j                         dk7  ry dd lddlm}m}m}mmm}  G fddj                        }j                  | ||       ||       ||            }	 j                  d      j                  }|g|_        ||_        dj                  dz         }	 fd	}
 | ||
      |	      r$t!        j"                  j%                  |	            S y #  Y y xY w)
Nr   Linux)c_charc_intc_size_tc_void_pc_char_pPOINTERc                   "    e Zd ZdW fdW  fgZy)8_find_already_mmapped_dylib_on_linux.<locals>.DlPhdrInfo	dlpi_addr	dlpi_nameN)__name__
__module____qualname___fields_)r   r   s   M/var/www/html/venv/lib/python3.12/site-packages/triton/backends/amd/driver.py
DlPhdrInfor      s    (#(#
    r   z	libc.so.6i      c           
          | j                   j                  }t        t        j                  |            }|j
                  v r'j                  ||t        t        |                   yy)Nr   r   )	contentsr   r   osfsdecodenamememmoveminlen)infosizedatar   pctypeslib_namemax_path_lengths        r   callbackz6_find_already_mmapped_dylib_on_linux.<locals>.callback2   sR    MM++	Y'(qvvNN4CY,PQr   )platformsystemr)   r   r   r   r   r   r   	Structure	CFUNCTYPECDLLdl_iterate_phdrargtypesrestypecreate_string_bufferr   r    	string_at)r*   r-   r   r   r   r   r   
callback_tr2   pathr,   r   r   r)   r+   s   `          @@@@r   $_find_already_mmapped_dylib_on_linuxr9      s    G#
 KK
V%% 
 !!%)<gh>OQXY_Q`aJ ++k2BB !+H5O#OO&&':;D z(+T2{{6++D122+s   1C- -C1c                     d} t        j                  d      }|rC|j                  |       r!t         j                  j	                  |      r|S t        d| d|        t        |       }|r2t         j                  j	                  |      r|S t        d| d|        g }dd l}|j                         }|j                         }|j                  r|g|z   }|D ]X  }t         j                  j                  |dd	|       }t         j                  j	                  |      r|c S |j                  |       Z t        j                  d
      }|rj|j                  d      D ]V  }	t         j                  j                  |	|       }
t         j                  j	                  |
      r|
c S |j                  |
       X t        j                  ddg      j!                         }|j#                         D cg c]5  }|j%                         j                  |       s#|j                         d   7 }}|D ]6  }t         j                  j	                  |      r|c S |j                  |       8 t         j                  j                  d|       }t         j                  j	                  |      r|S |j                  |       t        d|  d|       c c}w )Nzlibamdhip64.soTRITON_LIBHIP_PATHzTRITON_LIBHIP_PATH 'z' does not point to a valid zmemory mapped 'z'' in process does not point to a valid r   torchlibLD_LIBRARY_PATH:z/sbin/ldconfigz-pz/opt/rocm/lib/zcannot locate z after attempted paths )r   getenvendswithr8   existsRuntimeErrorr9   sitegetsitepackagesgetusersitepackagesENABLE_USER_SITEjoinappendsplit
subprocesscheck_outputdecode
splitlinesstrip)r*   env_libhip_pathmmapped_pathpathsrE   site_packages	user_siter8   env_ld_library_pathdflibslinelocsloccommon_install_paths                   r   _get_path_to_hip_runtime_dylibr^   @   sh   H ii 45O##H-"''..2Q""1/1BB^_g^hijj 8AL77>>,'_\N:abjaklmmE ((*M((*I"m3 ww||D'5(;77>>$KT	 ))$56$**3/ 	AQ)Aww~~a LLO		 ""$4d#;<CCED *.):^djjl>S>ST\>]DJJL^D^ 77>>#JS '',,'7B	ww~~)*""	LL$%
z1HP
QQ _s   )$KKc           	         t        j                  | j                  d            j                         }t	        |      }|j                  | d      }|t        j                         5 }t        j                  j                  |d      }t        |d      5 }|j                  |        d d d        t        |||g t        g       }t        |d      5 }|j                  |j!                         | dd      }d d d        d d d        dd l}	|	j$                  j'                  ||      }
|	j$                  j)                  |
      }|
j*                  j-                  |       |S # 1 sw Y   xY w# 1 sw Y   uxY w# 1 sw Y   yxY w)	Nzutf-8z.sozmain.cwrbT)binaryr   )hashlibsha256encode	hexdigestr   get_filetempfileTemporaryDirectoryr   r8   rI   openwriter   include_dirputreadimportlib.utilutilspec_from_file_locationmodule_from_specloaderexec_module)srcr!   keycache
cache_pathtmpdirsrc_pathrX   so	importlibspecmods               r   compile_module_from_srcr      sK   
..G,
-
7
7
9Cc"E4&-J((* 	Lfww||FH5Hh$ hKDBb$ L1"YYqvvxD6dYK
L	L >>11$
CD
..
)
)$
/CKKC J L L	L 	Ls<   (-E5E'(E5&E)5E5E&	"E5)E2	.E55E>c                   $     e Zd Z fdZd Z xZS )HIPUtilsc                 d    t        | d      st        t        |   |       | _        | j                  S )Ninstance)hasattrsuperr   __new__r   )cls	__class__s    r   r   zHIPUtils.__new__   s*    sJ' 37<CL||r   c                    t               }t        t        j                  j	                  t
        d            j                         }|j                  d|d      }t        |d      }|j                  | _	        |j                  | _
        y )Nzdriver.cz/*py_libhip_search_path*/r   	hip_utils)r^   r   r   r8   rI   dirname	read_textreplacer   load_binaryget_device_properties)selflibhip_pathru   r~   s       r   __init__zHIPUtils.__init__   sg    46277<<45??A kk5{AF%c;7??%(%>%>"r   )r   r   r   r   r   __classcell__r   s   @r   r   r      s    
	?r   r   c                 >    | d   dk(  ryddddddd	d
dddddddd|    S )Nr   *hipDeviceptr_tint32_tint8_tint16_tint64_tuint32_tuint8_tuint16_tuint64_tfloatdoublei1i8i16i32i64u1u8u16u32u64fp16bf16fp32f32fp64 tys    r   	ty_to_cppr      sQ    	!u|  	!
 
r   c                    t        |      }dj                  d |j                         D              }d }d }dj                  |j                         D cg c]  } | ||             c}      }	d|	z   }
t        |      dkD  r)ddj                  d |j                         D              z   nd}t	               }|j                         D cg c]	  }|| vs| }}d	| d
t        |      dkD  rd|z   nd ddj                  d |D               d| ddj                  |j                         D cg c]  \  }} ||       d| d c}}       d|
 d| ddj                  |j                         D cg c]  \  }}|d   dk(  rd| d| d| d| d	nd  c}}       dt        |      dkD  r)ddj                  d |j                         D              z   nd d}|S c c}w c c}w c c}}w c c}}w )Nz, c              3   B   K   | ]  \  }}t        |       d |   yw)z argN)r   .0ir   s      r   	<genexpr>z make_launcher.<locals>.<genexpr>   s#     S2Yr]O4s3Ss   c                 >    | d   dk(  ryddddddd	d
dddddddd|    S )Nr   r   	PyObject*r   r   r   r   r   r   r   r   r   r   r   r   r   s    r   _extracted_typez&make_launcher.<locals>._extracted_type   sQ    a5C<
  ! 	r   c                 &    dddddddddd	d
dd|    S )NOrX   rW   lbhr   BHIK)r   r   r   longr   r   r   r   r   r   r   r   r   r   s    r   	format_ofz make_launcher.<locals>.format_of   s:    
  	r    	iiiKKOOOOr   c              3   ,   K   | ]  \  }}d |   yw)z&_argNr   r   s      r   r   z make_launcher.<locals>.<genexpr>   s      LB5 Ls   a;  
#define __HIP_PLATFORM_AMD__
#include <hip/hip_runtime.h>
#include <Python.h>
#include <dlfcn.h>
#include <stdbool.h>
#include <dlfcn.h>

// The list of paths to search for the HIP runtime library. The caller Python
// code should substitute the search path placeholder.
static const char *hipLibSearchPaths[] = {"a  "};

// The list of HIP dynamic library symbols and their signature we are interested
// in this file.
#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN)                     \
  FOR_EACH_STR_FN(hipGetErrorString, hipError_t hipError)                     \
  FOR_EACH_ERR_FN(hipModuleLaunchKernel, hipFunction_t f,                     \
                  unsigned int gridDimX, unsigned int gridDimY,               \
                  unsigned int gridDimZ, unsigned int blockDimX,              \
                  unsigned int blockDimY, unsigned int blockDimZ,             \
                  unsigned int sharedMemBytes, hipStream_t stream,            \
                  void **kernelParams, void **extra)                          \
  FOR_EACH_ERR_FN(hipPointerGetAttribute, void *data,                         \
                  hipPointer_attribute attribute, hipDeviceptr_t ptr)

// The HIP symbol table for holding resolved dynamic library symbols.
struct HIPSymbolTable {
#define DEFINE_EACH_ERR_FIELD(hipSymbolName, ...)                             \
  hipError_t (*hipSymbolName)(__VA_ARGS__);
#define DEFINE_EACH_STR_FIELD(hipSymbolName, ...)                             \
  const char *(*hipSymbolName)(__VA_ARGS__);

  HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
};

static struct HIPSymbolTable hipSymbolTable;

bool initSymbolTable() {
  // Use the HIP runtime library loaded into the existing process if it exits.
  void *lib = dlopen("libamdhip64.so", RTLD_NOLOAD);
  if (lib) {
    // printf("[triton] chosen loaded libamdhip64.so in the process\n");
  }

  // Otherwise, go through the list of search paths to dlopen the first HIP
  // driver library.
  if (!lib) {
    int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
    for (int i = 0; i < n; ++i) {
      void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
      if (handle) {
        lib = handle;
        // printf("[triton] chosen %s\n", hipLibSearchPaths[i]);
      }
    }
  }
  if (!lib) {
    PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
    return false;
  }

  // Resolve all symbols we are interested in.
  dlerror(); // Clear existing errors
  const char *error = NULL;
#define QUERY_EACH_FN(hipSymbolName, ...)                                     \
  *(void **)&hipSymbolTable.hipSymbolName = dlsym(lib, #hipSymbolName);       \
  error = dlerror();                                                          \
  if (error) {                                                               \
    PyErr_SetString(PyExc_RuntimeError,                                       \
                    "cannot query " #hipSymbolName " from libamdhip64.so");   \
    dlclose(lib);                                                             \
    return false;                                                             \
  }

  HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)

  return true;
}

static inline void gpuAssert(hipError_t code, const char *file, int line)
{
   if (code != HIP_SUCCESS)
   {
      const char* prefix = "Triton Error [HIP]: ";
       const char* str = hipSymbolTable.hipGetErrorString(code);
      char err[1024] = {0};
      snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
      PyErr_SetString(PyExc_RuntimeError, err);
   }
}

#define HIP_CHECK(ans) { gpuAssert((ans), __FILE__, __LINE__); }

static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int clusterDimX, int clusterDimY, int clusterDimZ, int shared_memory, hipStream_t stream, hipFunction_t functionz>) {
  // printf("_launch hip kernel\n");
  void *params[] = { c              3   &   K   | ]	  }d |   yw)z&argNr   )r   r   s     r   r   z make_launcher.<locals>.<genexpr>M  s      <4s <s   zw };
  if (gridX*gridY*gridZ > 0) {
      HIP_CHECK(hipSymbolTable.hipModuleLaunchKernel(function, gridX, gridY, gridZ, aw  *num_warps, 1, 1, shared_memory, stream, params, 0));
    }
  }

typedef struct _DevicePtrInfo {
    hipDeviceptr_t dev_ptr;
    bool valid;
} DevicePtrInfo;

static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {
  DevicePtrInfo ptr_info;
  ptr_info.dev_ptr = 0;
  ptr_info.valid = true;
  if (PyLong_Check(obj)) {
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
    return ptr_info;
  }
  if (obj == Py_None) {
    // valid nullptr
    return ptr_info;
  }
  PyObject *ptr = PyObject_GetAttrString(obj, "data_ptr");
  if(ptr){
    PyObject *empty_tuple = PyTuple_New(0);
    PyObject *ret = PyObject_Call(ptr, empty_tuple, NULL);
    Py_DECREF(empty_tuple);
    Py_DECREF(ptr);
    if (!PyLong_Check(ret)) {
      PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
      ptr_info.valid = false;
      return ptr_info;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
    if(!ptr_info.dev_ptr)
      return ptr_info;
    uint64_t dev_ptr;
    hipError_t status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
    if (status == hipErrorInvalidValue) {
        PyErr_Format(PyExc_ValueError,
                     "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
        ptr_info.valid = false;
    }
    ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
    Py_DECREF(ret);
    return ptr_info;
  }
  PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
  return ptr_info;
}

static PyObject* launch(PyObject* self, PyObject* args) {
   // printf("launch\n");
  int gridX, gridY, gridZ;
  uint64_t _stream;
  uint64_t _function;
  PyObject *launch_enter_hook = NULL;
  PyObject *launch_exit_hook = NULL;
  PyObject *kernel_metadata = NULL;
  PyObject *launch_metadata = NULL;
   z _argz; z
  if(!PyArg_ParseTuple(args, "z", &gridX, &gridY, &gridZ, &_stream, &_function,
                                           &kernel_metadata, &launch_metadata,
                                           &launch_enter_hook, &launch_exit_hook a=  )) {
    return NULL;
  }

  // extract kernel metadata
  int num_warps, num_ctas, shared_memory, clusterDimX, clusterDimY, clusterDimZ;
  if (!PyArg_ParseTuple(kernel_metadata, "iiiiii", &num_warps, &num_ctas, &shared_memory, &clusterDimX, &clusterDimY, &clusterDimZ)) {
    return NULL;
  }
  // extract launch metadata
  if (launch_enter_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_enter_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }


  // raise exception asap
  r   zDevicePtrInfo ptr_infoz = getPointer(_argz); if (!ptr_infoz.valid) return NULL;z;
  _launch(gridX, gridY, gridZ, num_warps, num_ctas, clusterDimX, clusterDimY, clusterDimZ, shared_memory, (hipStream_t)_stream, (hipFunction_t)_functionc              3   H   K   | ]  \  }}|d    dk(  rd| dnd|   yw)r   r   ptr_infoz.dev_ptr_argNr   r   s      r   r   z make_launcher.<locals>.<genexpr>  s       j~  ch  cd  fh  EG  HI  EJ  LO  EO  mu  vw  ux  x@  kA  W[  \]  [^  U_  k_  j~s    "an  );

  if(launch_exit_hook != Py_None){
    PyObject* args = Py_BuildValue("(O)", launch_metadata);
    PyObject* ret = PyObject_CallObject(launch_exit_hook, args);
    Py_DECREF(args);
    if (!ret)
      return NULL;
  }

  if(PyErr_Occurred()) {
    return NULL;
  }
  // return None
  Py_INCREF(Py_None);
  return Py_None;
}

static PyMethodDef ModuleMethods[] = {
  {"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"},
  {NULL, NULL, 0, NULL} // sentinel
};

static struct PyModuleDef ModuleDef = {
  PyModuleDef_HEAD_INIT,
  "__triton_launcher",
  NULL, //documentation
  -1, //size
  ModuleMethods
};

PyMODINIT_FUNC PyInit___triton_launcher(void) {
  if (!initSymbolTable()) {
    return NULL;
  }
  PyObject *m = PyModule_Create(&ModuleDef);
  if(m == NULL) {
    return NULL;
  }
  PyModule_AddFunctions(m, ModuleMethods);
  return m;
}
)r$   rI   itemsvaluesr^   keys)	constants	signatureids	warp_size
start_desc	arg_declsr   r   r   args_formatformat	args_listr   r   paramsru   s                   r   make_launcherr      s   YJ		SARSSI*  ''IDTDTDVWb9_R%89WXK;&FPST]P^abPbtyy L)//:K LLLhjI02K #)@AQi-?a@F@
- .9M S:Cf X[  \e  Xf  ij  Xj  DH  KT  DT  pr  Cs syy <V <<= >UU^T_ ;`v 88Y__=NOEAr#$E!B/OPQ R  &x (RR[Q\ ]( 99  R[  Ra  Ra  Rc  d  IN  IJ  LNoqrsotx{o{&qc);A3bCSTUSVVjk  BD  D  d  e  f fY BE  FO  BP  ST  BT  Z^  ae  aj  aj  j~  lu  l{  l{  l}  j~  a~  Z~  Z\  Y] *]i^C~ JO X Az P. ds   G	G#G#/G(4#G.(c                       e Zd Zd Zd Zy)HIPLauncherc                    dt        d      rj                  j                  n	t               i}t        d      rj                  n	t               }fd}|j                         D ci c]  \  }} ||      | }}}j                  j                         D ci c]  \  }} ||      | }}}t        ||||j                        t        d      }	|	j                  | _        y c c}}w c c}}w )Nids_of_const_exprsfnr   c                 r    t        | t              r%j                  j                  j	                  |       S | S N)
isinstancestrr   	arg_namesindex)r   ru   s    r   <lambda>z&HIPLauncher.__init__.<locals>.<lambda>  s*    As9KCFF,,2215 QR r   __triton_launcher)r   r   
constexprstupler   dictr   r   r   r   r   launch)
r   ru   metadatar   r   cst_keyrv   valuer   r~   s
    `        r   r   zHIPLauncher.__init__  s    #'#t:LSVV%6%6RWRYZ%,S+%>CMMDF	R;D??;LMZS%WS\5(M	M;>==;N;N;PQZS%WS\5(Q	QIy#x7I7IJ%c+>?jj	 NQs   *C)C/c                 (     | j                   |i | y r   )r   )r   argskwargss      r   __call__zHIPLauncher.__call__  s    T$V$r   N)r   r   r   r   r   r   r   r   r   r     s    !%r   r   c                   4     e Zd Z fdZed        Zd Z xZS )	HIPDriverc                 V    t         |           t               | _        t        | _        y r   )r   r   r   utilsr   launcher_cls)r   r   s    r   r   zHIPDriver.__init__  s    Z
'r   c                  :    dd l } | j                  j                  d uS )Nr   )r<   versionhip)r<   s    r   	is_activezHIPDriver.is_active  s    }}  ,,r   c                     | j                         }| j                  j                  |      }|d   }|d   }t        d|j	                  d      d   |      S )NarchwarpSizer   r?   r   )get_current_devicer   r   r   rK   )r   devicedevice_propertiesr  r   s        r   get_current_targetzHIPDriver.get_current_target  sU    ((* JJ<<VD (%j1	

3 2I>>r   )r   r   r   r   staticmethodr   r  r   r   s   @r   r   r     s!    (
 - -?r   r   )	functoolsr   rc   rL   rh   pathlibr   triton.runtime.buildr   triton.runtime.cacher   triton.backends.compilerr   triton.backends.driverr   r8   r   realpath__file__rI   rl   r9   	lru_cacher^   r   objectr   r   r   r   r   r   r   r   <module>r     s     	     ' 2 . ,
''//"''**84
5ww||GY/0-` ;R ;R|&?v ?(
,Qh%& % ?	 ?r   