
    sg                         d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZmZmZmZ d dlZd dlmZ ej,                  j/                  ed      Zd	Z ed
      Z ed      Zde
ee	ef   ef   de
ee	ef   ef   fdZde
ee	ef   ef   de
ee	ef   ef   fdZ G d d      Z G d de      Z  e        Z!y)    N)cached_propertywraps)chain)median)AnyCallableDictListTuple)Concatenate	ParamSpecSelfTypeVar)countersbenchmarkingi  PTfnreturnc           	           t         j                  j                  j                  j	                  d      s S t               dt        dt        j                  dt        j                  dt        f fd       }|S )aY  Wrapper that logs the duration of `fn`, in milliseconds, along with a representation
    of the function's args and kwargs, if logging is enabled. It is expected that `fn` is
    a method of `Benchmarker` or one of its subclasses; typing limitations prevent us from
    declaring this directly. If logging is disabled, this becomes a no-op.
    r   selfargskwargsr   c           	          t        j                         } |i |}t        j                  d| j                  j
                  j
                  ||t        j                         |z
  t        z         |S )NzFCall `benchmarking.%s.%s(*args=%r, **kwargs=%r)` took %f milliseconds.)timeperf_counterloggerdebug	__class____name__MILLISECONDS_PER_SECOND)r   r   r   start_tresultr   s        W/var/www/html/venv/lib/python3.12/site-packages/torch/_inductor/runtime/benchmarking.pywrapperzmaybe_time.<locals>.wrapper"   sg    ##%T$V$TNN##KK 7*.EE	
     )torch_logging	_internal	log_stateis_artifact_enabledr   r   r   r   r   r   r   r%   s   ` r$   
maybe_timer-      si     >>##--AA.Q	
2Yc !&& AHH    Nr&   c           	           t               dt        dt        j                  dt        j                  dt
        f fd       }|S )a  Wrapper that increments relevant dynamo counters on `fn` call. It is expected that
    `fn` is a method of `Benchmarker` or one of its subclass; typing limitations prevent
    us from declaring this directly. The counter incrementation follows the formula,

    `counters["inductor"]["benchmarking.Foo.bar] += 1`

    where `Foo` is the class whose' instance called the function, and `bar` is the function name.
    r   r   r   r   c                     t         d   d| j                  j                  z   dz   j                  z   xx   dz  cc<    | g|i |S )Ninductorzbenchmarking..   )r   r   r    )r   r   r   r   s      r$   r%   zcount.<locals>.wrapper=   sT    dnn555;bkkI	
	 	
 $((((r&   )r   r   r   r   r   r   r,   s   ` r$   countr3   3   sF     2Y)c )!&& )AHH ) ) ) Nr&   c                       e Zd ZdeddfdZeedededef   de	e   de
eef   d	edefd
              Zee	 ddedeg ef   dededef
d              Zededed	edefd       Zy)Benchmarkerr   r   Nc                      y N )r   s    r$   __init__zBenchmarker.__init__H   s    r&   r   .fn_args	fn_kwargsr   c                 v   d}t        j                               D ]F  }t        |t        j                        s||j
                  }-|j
                  |k7  s=t        d       |t        d      fd}|t        j
                  d      k(  r | j                  |fi |S  | j                  |fi |S )a  Benchmark `fn(*fn_args, *fn_kwargs)` and return the runtime, in milliseconds (the
        actual runtime calculation is dictated by the benchmarking implementation, but may be
        one of [mean, median, minimum, etc.]). Functions as a convenience wrapper around
        device-specific implementations, like `benchmark_cpu` and `benchmark_gpu`. Raises
        `ValueError(...)` if we can't safely infer the device type of `fn`; for example,
        if multiple device types are found in `fn_args` and `fn_kwargs`, or if no device
        types are found.

        Arguments:
        - fn: The function to benchmark.
        - fn_args: The function's arguments.
        - fn_kwargs: The function's kwargs.

        Keyword Arguments:
        - **kwargs: The benchmarking implementation's kwargs.

        Returns:
        - The runtime of `fn(*fn_args, **fn_kwargs)`, in milliseconds.
        NzcCan't safely infer the device type of `fn` with multiple device types in `fn_args` and `fn_kwargs`!zCan't safely infer the device type of `fn` with no device types in `fn_args` or `fn_kwargs`! You should be calling `.benchmark_cpu` or `.benchmark_gpu` directly.c                        i S r7   r8   )r   r:   r;   s   r$   <lambda>z'Benchmarker.benchmark.<locals>.<lambda>u   s    B595 r&   cpu)	r   values
isinstancer'   Tensordevice
ValueErrorbenchmark_cpubenchmark_gpu)r   r   r:   r;   r   inferred_devicearg_or_kwarg	_callables    ```    r$   	benchmarkzBenchmarker.benchmarkK   s    8 !'9+;+;+=> 	LlELL9&"."5"5$$7 y 	 " t  6	ell511%4%%i:6:: "t!!)6v66r&   rI   warmuprepc                 h    dt         dt        t           ffd} ||       t         ||            S )a  Benchmark the CPU callable, `_callable`, and return the median runtime,
        in milliseconds.

        Arguments:
        - _callable: The CPU callable to benchmark.

        Keyword Arguments:
        - warmup: Optionally, the duration, in milliseconds, to run `_callable`
        before benchmarking starts.
        - rep: Optionally, the duration, in milliseconds, to run `_callable`
        during benchmarking.

        Returns:
        - The median runtime of `_callable`, in milliseconds.
        msr   c                     g }t        j                         }	 t        j                         }         t        j                         }|j                  ||z
  t        z         ||z
  t        z  | kD  r	 |S ]r7   )r   r   appendr!   )rN   timingsrun_start_tr"   end_trI   s        r$   run_forz*Benchmarker.benchmark_cpu.<locals>.run_for   sp    G++-K++-))+3JJK[(,CCrIN r&   )intr
   floatr   )r   rI   rK   rL   rT   s    `   r$   rE   zBenchmarker.benchmark_cpu}   s2    *
	 
	U 
	 	gcl##r&   r   c                     t         r7   )NotImplementedError)r   r   r   s      r$   rF   zBenchmarker.benchmark_gpu   s    !!r&   )   d   )r    
__module____qualname__r   r9   r-   r3   r   r   r   r	   strrV   rJ   rU   rE   rF   r8   r&   r$   r5   r5   G   s    t   
.7.7S#X.7 s.7 S>	.7
 .7 
.7  .7` 
OR $ $'C0 $:= $IL $	 $   $D "D " " " " "r&   r5   c            
       z    e Zd Zeeedededef   fd                     Z	eededeg ef   dede
fd              Zy)	TritonBenchmarkerr   r   .c                 N    	 ddl m} |S # t        $ r}t        d      |d}~ww xY w)z"Lazily import Triton's `do_bench`.r   )do_benchzrequires TritonN)triton.testingra   ImportErrorrX   )r   ra   es      r$   triton_do_benchz!TritonBenchmarker.triton_do_bench   s4    
	@/   	@%&78a?	@s   
 	$$rI   r   c                     d|v r | j                   |fi |d   S d|v r | j                   |fi |S  | j                   |fi |ddiS )a  Benchmark the GPU callable, `_callable`, and return the runtime, in milliseconds.

        Arguments:
        - _callable: The GPU callable to benchmark.

        Keyword Arguments:
        - quantiles: Optionally, a tuple of floats denoting the requested quantiles.
        - return_mode: Optionally, the requested return mode. Currently, Triton's
        `do_bench` supports min, max, mean, and median return modes.
        - **kwargs: Additional kwargs passed to Triton's `do_bench`.

        Returns:
        - The runtime of `callable`, in milliseconds. If `kwargs["quantiles"]` is specified,
        this is the first requested quantile. Else, if `kwargs["return_mode"]` is specified,
        this is the requested return mode. Otherwise, this is the median.
        	quantilesr   return_moder   )re   )r   rI   r   s      r$   rF   zTritonBenchmarker.benchmark_gpu   sf    & & '4''	<V<Q??f$'4''	<V<<#t##INNXNNr&   N)r    r[   r\   r   r-   r3   r   r   r   re   rV   rF   r8   r&   r$   r_   r_      s    
d xS'9     
OD OXb#g-> O# ORW O  Or&   r_   )"r   	functoolsr   r   	itertoolsr   
statisticsr   typingr   r   r	   r
   r   typing_extensionsr   r   r   r   r'   torch._dynamo.utilsr   r(   getArtifactLoggerr    r   r!   r   r   r-   r3   r5   r_   benchmarkerr8   r&   r$   <module>rq      s     ,   3 3 C C  ( 
	)	)(N	C  cNCLS!V$a'(k#q&!1$%<h{36*A-. 8KQ<OQR<R3S (\" \"~#O #OL  !r&   