
    +sg|                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ  ej                  e	      Z
 G d dej                        Zy)    )annotationsN)Tensornnc                  J     e Zd ZdZdd fdZd	dZd Zd Zed        Z	 xZ
S )
WordWeightszDThis model can weight word embeddings, for example, with idf-values.c                   t         |           g d| _        || _        || _        || _        g }d}|D ]J  }|}||v r||   }n+|j                         |v r||j                            }n|dz  }|j                  |       L t        j                  | dt        |       d|        t        j                  t        |      d      | _        | j                  j                  dt        j                   |      j#                  d      i       y)aZ  
        Initializes the WordWeights class.

        Args:
            vocab (List[str]): Vocabulary of the tokenizer.
            word_weights (Dict[str, float]): Mapping of tokens to a float weight value. Word embeddings are multiplied
                by this float value. Tokens in word_weights must not be equal to the vocab (can contain more or less values).
            unknown_word_weight (float, optional): Weight for words in vocab that do not appear in the word_weights lookup.
                These can be, for example, rare words in the vocab where no weight exists. Defaults to 1.
        )vocabword_weightsunknown_word_weightr      z of z0 words without a weighting value. Set weight to weightN)super__init__config_keysr	   r
   r   lowerappendloggerinfolenr   	Embedding	emb_layerload_state_dicttorchFloatTensor	unsqueeze)	selfr	   r
   r   weightsnum_unknown_wordswordr   	__class__s	           [/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/models/WordWeights.pyr   zWordWeights.__init__   s    	K
(#6  	#D(F|#%d+-%djjl3!Q&!NN6"	# 	 !c%j\1abuavw	
 c%j!4&&%2C2CG2L2V2VWX2Y'Z[    c                @   |d   }|d   }| j                  |d         j                  d      }||j                         z  }t        j                  |d      }|j                  d      j                  |j                               }||z  }|j                  ||d       |S )Nattention_masktoken_embeddings	input_idsr   )r%   token_weights_sum)	r   squeezefloatr   sumr   expandsizeupdate)r   featuresr$   r%   token_weights_rawtoken_weightsr(   token_weights_expandeds           r!   forwardzWordWeights.forward4   s    !"23#$67 !NN8K+@AII"M)N,@,@,BB!IImQ7 "/!8!8!<!C!CDTDYDYD[!\+.DD-=Tefgr"   c                \    | j                   D ci c]  }|| j                  |    c}S c c}w )N)r   __dict__)r   keys     r!   get_config_dictzWordWeights.get_config_dictD   s*    373C3CDCT]]3''DDDs   )c                    t        t        j                  j                  |d      d      5 }t	        j
                  | j                         |d       d d d        y # 1 sw Y   y xY w)Nconfig.jsonw   )indent)openospathjoinjsondumpr7   )r   output_pathfOuts      r!   savezWordWeights.saveG   sK    "'',,{M:C@ 	>DIId**,d1=	> 	> 	>s   'AA$c                    t        t        j                  j                  | d            5 }t	        j
                  |      }d d d        t        di S # 1 sw Y   xY w)Nr9    )r=   r>   r?   r@   rA   loadr   )
input_pathfInconfigs      r!   rH   zWordWeights.loadK   sM    "'',,z=9: 	$cYYs^F	$ $V$$	$ 	$s   AA)r   )r	   z	list[str]r
   zdict[str, float]r   r*   )r/   zdict[str, Tensor])__name__
__module____qualname____doc__r   r3   r7   rE   staticmethodrH   __classcell__)r    s   @r!   r   r      s1    N"\H E> % %r"   r   )
__future__r   rA   loggingr>   r   r   r   	getLoggerrL   r   Moduler   rG   r"   r!   <module>rV      s;    "   	  			8	$C%")) C%r"   