
    sgw                        d dl Z d dlZd dlZd dlZd dlZd dlmc mc mZ	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZmZmZmZ d dlm Z m!Z! d dl"m#Z# dd	l$m%Z%m&Z& g d
Z'eZ(ejR                  ejT                  jR                  ejV                  ejT                  jV                  iejT                  jR                  ej                  jR                  ejT                  jV                  ej                  jV                  idZ,d Z-	 	 	 d!dZ.d"dZ/d Z0d Z1d#dZ2	 	 	 	 d$dZ3d Z4d Z5	 	 	 	 d%dZ6d Z7d Z8d&dZ9dejt                  ddfdZ;d&dZ<d#dZ=	 	 	 	 	 	 d'dZ>	 	 	 	 	 d(dZ?	 d#dZ@d)d ZAy)*    N)_FusedModule)_is_activation_post_process)_activation_is_memoryless_add_module_to_qconfig_obs_ctrdefault_dynamic_qconfigfloat16_dynamic_qconfig!float_qparams_weight_only_qconfig&float_qparams_weight_only_qconfig_4bit)_get_special_act_post_process_has_special_act_post_process)get_default_dynamic_quant_module_mappingsget_default_qat_module_mappings$get_default_qconfig_propagation_list(get_default_static_quant_module_mappings2get_default_static_quant_reference_module_mappingsno_observer_set)DeQuantStubQuantWrapper)type_before_parametrizations   )get_qparam_dict)has_no_children_ignoring_parametrizations)
get_default_custom_config_dictpropagate_qconfig_add_quant_dequantpreparequantizequantize_dynamicprepare_qatquantize_qatconvertswap_module)%float_to_observed_custom_module_class)observed_to_quantized_custom_module_classc                      t         S )z'Defines the default custom config dict.)_DEFAULT_CUSTOM_CONFIG_DICT     Q/var/www/html/venv/lib/python3.12/site-packages/torch/ao/quantization/quantize.pyr   r   B   s    &&r(   c                    |j                  t        |       |      }|j                  ||      }t        | d|      }t        j                  j
                  j                  j                  ||        t        ||       }|| _        | j                         D ]T  \  }}|r|dz   |z   n|}	|3||j                  dg       v r)t        |      |j                  dg       v rGt        ||||	       V y)a  This is a helper function for `propagate_qconfig_`

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name of submodule to quantization
                     configuration
        qconfig_parent: quantization config of parent module, we will fallback to
                       this config when there is no specified config for current
                       module
        prefix: corresponding prefix of the current module, used as key in
                qconfig_dict
        prepare_custom_config_dict: dictionary for custom handling of modules
                                    see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    qconfig.Nnon_traceable_module_namenon_traceable_module_class)getr   getattrtorchaoquantizationr+   _assert_valid_qconfigr   named_childrentype_propagate_qconfig_helper)
moduleqconfig_dictqconfig_parentprefixprepare_custom_config_dictmodule_qconfigqconfig_with_device_checknamechildmodule_prefixs
             r)   r7   r7   G   s    2 "%%$V,nN "%%fn=NVY?N	HH!!77O >~v V.FN,,. 
e/5t+4%-.223NPRSSE{)--.JBOP &|%>
r(   c                 0    |i }|i }t        | ||       y)a  Propagate qconfig through the module hierarchy and assign `qconfig`
    attribute on each leaf module

    Args:
        module: input module
        qconfig_dict: dictionary that maps from name or type of submodule to
            quantization configuration, qconfig applies to all submodules of a
            given module unless qconfig for the submodules are specified (when
            the submodule already has qconfig attribute)
        prepare_custom_config_dict: dictionary for custom handling of modules
            see docs for :func:`~torch.ao.quantization.prepare_fx`

    Return:
        None, module is modified inplace with qconfig attached
    N)r<   )r7   )r8   r9   r<   s      r)   r   r   x   s+      !)%'"9Sr(   c                 $    | j                  |      S )z.Forward hook that calls observer on the outputactivation_post_process)selfinputoutputs      r)   _observer_forward_hookrI      s    ''//r(   c                 *    | j                  |d         S )z2Forward pre hook that calls observer on the outputr   rD   )rF   rG   s     r)   _observer_forward_pre_hookrK      s    ''a11r(   Fc                     t        | d      sJ d       |r| j                  t        d      }y | j                  t        d      }y )NrE   zGExpect activation_post_process attribute already attached to the moduleT)prepend)hasattrregister_forward_pre_hookrK   register_forward_hookrI   )r8   pre_hookhandles      r)   &_register_activation_post_process_hookrS      s[    ) QPQ  11& 2 
 --.Dd-Sr(   c                    |
t               }|i }Gt        |       }t        |      dk  s
J d|        t        |      dkD  rt        t	        |            ndddd dfd	}| j                         D ]m  \  }}t        |      t        j                  fv r$t        t        |      t        j                  t        j                  f      rB |      s`t        |d      sJ d	t        |       d
        |j                        |_        t!        |t"              r |      s ||       |t        |      |v r |      s ||       t%        |      rt'        |      }	 |||	        |      r[t        |      |v rN|t        |         j)                  |      }
t+        | ||
       |t        |         t-               vsU ||
       _t/        ||||       p t1        |       r9t!        | t2        j                  j4                        st        |       |v r ||        t        | d      r<t!        | t2        j                  j4                        st        |       |v r	 ||        yyyy)as  Add observer for the leaf child of the module.

    This function insert observer module to all leaf child module that
    has a valid qconfig attribute.

    Args:
        module: input module with qconfig attributes for all the leaf modules that we want to quantize
        qconfig_propagation_list: a list of quantizable modules that will have observers added to them
            if they are leaf nodes
        device: parent device, if any
        non_leaf_module_list: list of non-leaf modules we want to add observer

    Return:
        None, module is modified inplace with added observer modules and forward_hooks
    Nr   zR_add_observer_ only works with cpu or single-device CUDA modules, but got devices r   c                 ^    || j                         n |       }||j                  |       |S N)
activationto)r+   devicespecial_act_post_processrW   s       r)   get_activation_post_processz3_add_observer_.<locals>.get_activation_post_process   s=     (/  )+ 	
 MM&!r(   c                 :    t        | d      xr | j                  d uS )Nr+   rN   r+   )ms    r)   needs_observationz)_add_observer_.<locals>.needs_observation   s    q)$>$)>>r(   c                      |       rVt        | t              sE| j                  d | j                  |             t	        | t        | j                               yyy)zmAdds an activation post process module and register
        a pre or post hook that calls the module
        rE   rQ   N)
isinstancer   
add_moduler+   rS   r   )r^   rZ   rY   r[   r_   s     r)   insert_activation_post_processz6_add_observer_.<locals>.insert_activation_post_process   s[    
 Q
1k(BLL)+IIv'? 35aii@ )Cr(   rE   zfunctional class z- has no pre-defined `activation_post_process`weight_fake_quantrV   )r   _get_unique_devices_lennextiterr5   r   nnDropout
issubclassnnqFloatFunctionalQFunctionalrN   r+   rE   rb   r   r   r   
from_floatsetattrr   _add_observer_r   r1   
Sequential)r8   qconfig_propagation_listnon_leaf_module_listrY   custom_module_class_mappingdevicesrd   r?   r@   rZ   observed_childr[   r_   s      `       @@r)   rr   rr      s   ,  '#G#I "*&(# ~&v.LA	j_`g_hi	j(+Gq(8d7m$d?& ,,. 1e'.2::,>(/#2E2Es1W
 !'4 z&'CE'J&KKxyz  1LMM61- |, '.u5 ,,U37KK '.u5*51'DU'K$*52JKe$,U37RR8,U3j  FD.1 ,,H,OP&() /~>($+W1l 	2&96588#6#67(04LL&v. 	+,6588#6#67(04LL&v. M 8 	-r(   c                     | j                         D ch c]  }|j                   c}| j                         D ch c]  }|j                   c}z  S c c}w c c}w rV   )
parametersrY   buffers)r8   ps     r)   rf   rf   2  sM    $//12AHH2 ..*66  2 6s
   AAc                     t        |       r#t        | d      r| j                  rt        |       S | j	                         D ]  \  }}t        |      | j                  |<    | S )a{  Wrap the leaf child module in QuantWrapper if it has a valid qconfig
    Note that this function will modify the children of module inplace and it
    can return a new module which wraps the input module as well.

    Args:
        module: input module with qconfig attributes for all the leaf modules
        that we want to quantize

    Return:
        Either the inplace modified module with submodules wrapped in
        `QuantWrapper` based on qconfig or a new `QuantWrapper` module which
        wraps the input module, the latter case only happens when the input
        module is a leaf module and we want to quantize it.
    r+   )r   rN   r+   r   r5   r   _modules)r8   r?   r@   s      r)   r   r   8  s\      	2&9FI&NNF##,,. 9e 1% 89Mr(   c                 l   t         j                  j                  d       |
t               }|j	                  di       }|st        j                  |       } |}|
t               }t        | d       t        d | j                         D              st        j                  d       t        | |||       | S )a  Prepares a copy of the model for quantization calibration or quantization-aware training.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    The model will be attached with observer or fake quant modules, and qconfig
    will be propagated.

    Args:
        `model`: input model to be modified in-place
        `inplace`: carry out model transformations in-place, the original module is mutated
        `allow_list`: list of quantizable modules
        `observer_non_leaf_module_list`: list of non-leaf modules we want to add observer
        `prepare_custom_config_dict`: customization configuration dictionary for prepare function

    .. code-block:: python

       # Example of prepare_custom_config_dict:
       prepare_custom_config_dict = {
           # user will manually define the corresponding observed
           # module class which has a from_float class method that converts
           # float custom module to observed custom module
           "float_to_observed_custom_module_class": {
               CustomModule: ObservedCustomModule
           }
        }

    z!quantization_api.quantize.prepareNr#   r9   c              3   P   K   | ]  }t        |d       xr |j                     yw)r+   Nr]   ).0r^   s     r)   	<genexpr>zprepare.<locals>.<genexpr>  s#     Lqwq)$22Ls   $&zNone of the submodule got qconfig applied. Make sure you passed correct configuration through `qconfig_dict` or by assigning the `.qconfig` attribute directly on submodules)rv   )r1   _C_log_api_usage_oncer   r/   copydeepcopyr   r   anymoduleswarningswarnrr   )modelinplace
allow_listobserver_non_leaf_module_listr<   rv   rt   s          r)   r   r   S  s    F 
HH  !DE!)%C%E""<"@"@/# e$  *#G#I u40 LEMMOLLK	
  %$?	 Lr(   c                      t         d      r!t         j                        rt         d       d fd	} |d        |d       y )NrE   Fc                     | rj                   nj                  }| rt        nt        }t	               }|j                         D ]  \  }}||u s|j                  |        |D ]  }|j                  |        y rV   )_forward_pre_hooks_forward_hooksrK   rI   setitemsaddpop)rQ   hook_mapobserver_hookhandle_ids_to_remove	handle_idhook_fnr8   s         r)   remove_hooksz5_remove_activation_post_process.<locals>.remove_hooks  s~    086,,f>S>S*2&8N 	  #u"*.."2 	4Iw-'$((3	4 . 	$ILL#	$r(   Tra   F)rN   r   rE   delattr)r8   r   s   ` r)   _remove_activation_post_processr     sE     v016Q&&7 	12
$ $% r(   c                 v    | j                         D ]  }t        |        t        | d      r| `t	        |        y)zClean up the qconfig left in the module so that new qconfig can be
    propagated.

    Args:
        module: module to be cleaned up
    r+   N)children_remove_qconfigrN   r+   r   )r8   r@   s     r)   r   r     s;     "  vy!N#F+r(   c                     t         j                  j                  d       |
t               }|st	        j
                  |       } | j                          t        | d        || g|  t        | |d       | S )a  Quantize the input float model with post training static quantization.

    First it will prepare the model for calibration, then it calls
    `run_fn` which will run the calibration step, after that we will
    convert the model to a quantized model.

    Args:
        model: input float model
        run_fn: a calibration function for calibrating the prepared model
        run_args: positional arguments for `run_fn`
        inplace: carry out model transformations in-place, the original module is mutated
        mapping: correspondence between original module types and quantized counterparts

    Return:
        Quantized model.
    z"quantization_api.quantize.quantizeTr   )	r1   r   r   r   r   r   evalr   r!   )r   run_fnrun_argsmappingr   s        r)   r   r     sf    " 
HH  !EF:<e$	JJLE4 
58E7D)Lr(   c                 p   t         j                  j                  d       ||t         j                  k(  r|t        j
                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        i}n|t         j                  k(  r|t        j
                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        t        j                  t        i}n(|t         j                  k(  r+t        j                  t         t        j"                  t         i}n|t         j$                  k(  rt        j                  t&        i}nt)        d| d      t+        |t,              r|t         j                  u rt        }n`|t         j                  u rt        }nG|t         j                  u rt         }n.|t         j$                  u rt&        }nt/        dt1        |            t3        t5        |t7        j8                  |                  }|
t;               }|st=        j>                  |       } | jA                          tC        | |       tE        | |d       | S )av  Converts a float model to dynamic (i.e. weights-only) quantized model.

    Replaces specified modules with dynamic weight-only quantized versions and output the quantized model.

    For simplest usage provide `dtype` argument that can be float16 or qint8. Weight-only quantization
    by default is performed for layers with large weights size - i.e. Linear and RNN variants.

    Fine grained control is possible with `qconfig` and `mapping` that act similarly to `quantize()`.
    If `qconfig` is provided, the `dtype` argument is ignored.

    Args:
        model: input model
        qconfig_spec: Either:

            - A dictionary that maps from name or type of submodule to quantization
              configuration, qconfig applies to all submodules of a given
              module unless qconfig for the submodules are specified (when the
              submodule already has qconfig attribute). Entries in the dictionary
              need to be QConfig instances.

            - A set of types and/or submodule names to apply dynamic quantization to,
              in which case the `dtype` argument is used to specify the bit-width

        inplace: carry out model transformations in-place, the original module is mutated
        mapping: maps type of a submodule to a type of corresponding dynamically quantized version
            with which the submodule needs to be replaced

    z*quantization_api.quantize.quantize_dynamicz5Don't know how to quantize with default settings for z. Provide full qconfig pleasez.Unknown dtype specified for quantize_dynamic: Tr   )#r1   r   r   qint8rj   Linearr   LSTMGRULSTMCellRNNCellGRUCellfloat16r   quint8EmbeddingBagr	   	Embeddingquint4x2r
   
ValueErrorrb   r   RuntimeErrorstrdictzip	itertoolsrepeatr   r   r   r   r   r!   )r   qconfig_specdtyper   r   default_qconfigs         r)   r   r     s   > 
HH  !MNEKK		20/4

3

3L emm#		20/4

3

3L ell"!B?L enn$!GL GwNkl  
L#	&EKK5Oemm#5Oell"?Oenn$DO@#e*  Ci.>.>.OPQ;=e$	JJLul+E7D)Lr(   c                 2   t         j                  j                  d       | j                  sJ d       |
t	               }|st        j                  |       } t        | d       t        | |dd       t        | t        |j                               d       | S )	a  
    Prepares a copy of the model for quantization calibration or
    quantization-aware training and converts it to quantized version.

    Quantization configuration should be assigned preemptively
    to individual submodules in `.qconfig` attribute.

    Args:
        model: input model to be modified in-place
        mapping: dictionary that maps float modules to quantized modules to be
                 replaced.
        inplace: carry out model transformations in-place, the original module
                 is mutated
    z%quantization_api.quantize.prepare_qatz1prepare_qat only works on models in training modeNr   TF)r   r   remove_qconfig)r   r   )r1   r   r   trainingr   r   r   r   r!   r   r   values)r   r   r   s      r)   r   r   8  s}     
HH  !HI>>NNN>13e$u40E7DGEW^^5E1FPTULr(   c                     t         j                  j                  d       |st        j                  |       } | j                          t        | d        || g|  t        | d       | S )ag  Do quantization aware training and output a quantized model

    Args:
        model: input model
        run_fn: a function for evaluating the prepared model, can be a
                function that simply runs the prepared model or a training
                loop
        run_args: positional arguments for `run_fn`

    Return:
        Quantized model.
    z&quantization_api.quantize.quantize_qatTr   )r1   r   r   r   r   trainr   r!   )r   r   r   r   s       r)   r    r    U  sW     
HH  !IJe$	KKMt$
58E4 Lr(   c                     t         j                  j                  d       |st        j                  |       } t        | |d|||       |rt        |        | S )a  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class. And remove qconfig at the
    end if remove_qconfig is set to True.

    Args:
        `module`: prepared and calibrated module
        `mapping`: a dictionary that maps from source module type to target
                   module type, can be overwritten to allow swapping user defined
                   Modules
        `inplace`: carry out model transformations in-place, the original module
                   is mutated
        `convert_custom_config_dict`: custom configuration dictionary for convert function
        `use_precomputed_fake_quant`: a flag to enable use of precomputed fake quant

    .. code-block:: python

       # Example of convert_custom_config_dict:
       convert_custom_config_dict = {
           # user will manually define the corresponding quantized
           # module class which has a from_observed class method that converts
           # observed custom module to quantized custom module
           "observed_to_quantized_custom_module_class": {
               ObservedCustomModule: QuantizedCustomModule
           }
       }

    z!quantization_api.quantize.convertT)r   is_referenceconvert_custom_config_dictuse_precomputed_fake_quant)r1   r   r   r   r   _convertr   )r8   r   r   r   r   r   r   s          r)   r!   r!   l  sU    H 
HH  !DEv&!#=#= Mr(   c           	         ||r
t               n	t               }|
t               }|j                  di       }|st	        j
                  |       } i }| j                         D ]D  \  }}	t        |	t              st        |	      |vrt        |	|d|||       t        |	|||      ||<   F |j                         D ]  \  }
}|| j                  |
<    | S )ao  Converts submodules in input module to a different module according to `mapping`
    by calling `from_float` method on the target module class

    Args:
        module: input module
        mapping: a dictionary that maps from source module type to target
                 module type, can be overwritten to allow swapping user defined
                 Modules
        inplace: carry out model transformations in-place, the original module
                 is mutated
        is_reference: a flag to enable quantized reference module
        use_precomputed_fake_quant: a flag to enable use of precomputed fake quant

    r$   Tr   )r   r   r   r/   r   r   r5   rb   r   r   r   r"   r   r~   )r8   r   r   r   r   r   rv   reassignr?   modkeyvalues               r)   r   r     s    ,   ?@9; 	
 ")%C%E""<"@"@3R# v&H**, 
	c 3-,S19TT*+E %57Q

& nn& %
U$% Mr(   c                    | }t        | d      r| j                  d}t        |       |v r |t        |          j                  |       }d}nt        |       |v r|t        |          }t        |d      rd|j                  rX| j                  J | j                  j                         } || j
                         t        |      }|j                  | |      }nRt        j                  |j                        }	d|	j                  v r|j                  | |      }n|j                  |       }d}|r| j                  j                         D ]  }
|j                  |
        | j                  j                         D ]  }|t        us|j!                  |        t#        |       }t%        |      dk  s
J d	|        t%        |      d
kD  rt'        t)        |            nd}|r|j+                  |       |S )a	  Swaps the module if it has a quantized counterpart and it has an
    `observer` attached.

    Args:
        mod: input module
        mapping: a dictionary that maps from nn module to nnq module

    Return:
        The corresponding quantized module of `mod`
    r+   NFT_IS_REFERENCEr   r   r   zOswap_module only works with cpu or single-device CUDA modules, but got devices r   )rN   r+   r   from_observedr   weightr   rp   inspect	signaturerz   r   r   rO   r   rI   rP   rf   rg   rh   ri   rX   )r   r   rv   r   new_modswappedqmodweight_post_processweight_qparamssigpre_hook_fnr   rw   rY   s                 r)   r"   r"     s    GsI3;;#:',0KK1,S1mC   G)#.'97<=Dt_-$2D2D{{...&)kk&8&8&:##CJJ/!01D!E//#~>''8/3>>A"oo8R . G #ooc2GG"55<<> ?11+>? --446 ;"8811':;
 +3/GG!k`ah`ijk!,/L1,<T$w-($F

6"Nr(   c                     d }t        | d      r| j                  | ||      dz   <   | j                         D ]!  \  }}|r ||      |z   n|}t        |||       # y)a,  Traverse the modules and save all observers into dict.
    This is mainly used for quantization accuracy debug
    Args:
        mod: the top module we want to save all observers
        prefix: the prefix for the current module
        target_dict: the dictionary used to save all the observers
    c                     | dk(  r| S | dz   S )N r,   r'   )r;   s    r)   
get_prefixz&_get_observer_dict.<locals>.get_prefix#  s    2v76C<7r(   rE   N)rN   rE   r5   _get_observer_dict)r   target_dictr;   r   r?   r@   rA   s          r)   r   r     st    8 s-. '' 	v!::	
 ))+ >e5;
6*T15+}=>r(   )Nr   N)NNr   )NNNN)FNNN)NF)NFTFNF)NFFNF)r   )Br   r   r   r   r1   torch.ao.nn.quantizedr2   rj   	quantizedrm   torch.nntorch.ao.nn.intrinsicr   torch.ao.quantization.observerr   torch.ao.quantization.qconfigr   r   r   r   r	   r
   +torch.ao.quantization.quantization_mappingsr   r   r   r   r   r   r   r   torch.ao.quantization.stubsr   r   torch.nn.utils.parametrizer   utilsr   r   __all__is_activation_post_processr   quantizableMultiheadAttentionr&   r   r7   r   rI   rK   rS   rr   rf   r   r   r   r   r   r   r   r   r    r!   r   r"   r   r'   r(   r)   <module>r      s        # #  . F 	 	 	 B C M 9 
 	$$
r~~@@.
 	R\\..
))2<<+J+J2	 ' #.b20
2
	T " $H/V: "&#AH!4, < EKKuWt:2 #$1l #$;~ KP9x>r(   