
    +sg0                        d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z d dl	mZ d dlmZ d dlmZ d dlmZmZmZ dd	lmZmZ  ej.                  e      Z G d
 dej4                        Zy)    )annotationsN)	load_file)	save_file)nn)tqdm)fullnamehttp_getimport_from_string   )WhitespaceTokenizerWordTokenizerc                      e Zd Z	 	 d	 	 	 	 	 ddZd ZddZddZdddZd Ze	dd       Z
e	dd	 e       d
f	 	 	 	 	 	 	 dd       Zy
)WordEmbeddingsFc                   t         j                  j                  |        t        |t              rt        j                  |      }t        |t
        j                        rt        j                  |      }|j                         \  }}|| _        t        j                  ||      | _        | j                  j                  d|i       || j                  j                  _        || _        || _        || _        y )Nweight)r   Module__init__
isinstancelistnpasarrayndarraytorch
from_numpysizeembeddings_dimension	Embedding	emb_layerload_state_dictr   requires_grad	tokenizerupdate_embeddingsmax_seq_length)selfr!   embedding_weightsr"   r#   num_embeddingsr   s          ^/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/models/WordEmbeddings.pyr   zWordEmbeddings.__init__   s     			4 '. "

+< ='4 % 0 01B C/@/E/E/G,,$8!n6JK&&2C'DE.?+"!2,    c                b    | j                  |d         }d }|j                  |||d   d       |S )N	input_idsattention_mask)token_embeddingscls_token_embeddingsr+   )r   update)r$   featuresr,   
cls_tokenss       r'   forwardzWordEmbeddings.forward.   sB    >>(;*?@
$4(2"*+;"<	
 r(   c                6   |D cg c]  } | j                   j                  |fi |! }}|D cg c]  }t        |       }}t        |      }g }g }	|D ]I  }dg|t        |      z
  z  }
|j	                  ||
z          |	j	                  dgt        |      z  |
z          K t        j                  |t
        j                        t        j                  |	t
        j                        t        j                  |t
        j                        d}|S c c}w c c}w )Nr   r   )dtype)r*   r+   sentence_lengths)r!   tokenizelenmaxappendr   tensorlong)r$   textskwargstexttokenized_textstokensr4   max_lenr*   attention_maskspaddingoutputs               r'   r5   zWordEmbeddings.tokenize:   s    OTUt24>>224B6BUU6EFFCKFF&'	% 	@FcWs6{23GVg-.""A3V#4w#>?	@ iuzzB#ll?%**M %-=UZZ P
 # VFs
   $DDc                    | j                   S )N)r   r$   s    r'   get_word_embedding_dimensionz+WordEmbeddings.get_word_embedding_dimensionN   s    (((r(   c                   t        t        j                  j                  |d      d      5 }t	        j
                  | j                         |d       d d d        |r9t        | j                         t        j                  j                  |d             nBt        j                  | j                         t        j                  j                  |d             | j                  j                  |       y # 1 sw Y   xY w)Nwordembedding_config.jsonw   )indentmodel.safetensorspytorch_model.bin)openospathjoinjsondumpget_config_dictsave_safetensors_file
state_dictr   saver!   )r$   output_pathsafe_serializationfOuts       r'   rW   zWordEmbeddings.saveQ   s    "'',,{,GH#N 	>RVIId**,d1=	> !$//"3RWW\\+Ob5cdJJt("'',,{DW*XYK(	> 	>s   'C33C<c                \    t        | j                        | j                  | j                  dS )N)tokenizer_classr"   r#   )r   r!   r"   r#   rE   s    r'   rT   zWordEmbeddings.get_config_dict[   s*    '7!%!7!7"11
 	
r(   c                v   t        t        j                  j                  | d            5 }t	        j
                  |      }d d d        t        d         }|j                  |       }t        j                  j                  t        j                  j                  | d            r*t        t        j                  j                  | d            }nIt        j
                  t        j                  j                  | d      t        j                  d      d      }|d   }t        |||d	   
      }|S # 1 sw Y   xY w)NrH   r\   rL   rM   cpuT)map_locationweights_onlyzemb_layer.weightr"   r!   r%   r"   )rN   rO   rP   rQ   rR   loadr
   existsload_safetensors_filer   devicer   )
input_pathfInconfigr\   r!   weightsr%   models           r'   rb   zWordEmbeddings.loadb   s    "'',,z+FGH 	$CYYs^F	$ -V4E-FG#((4	77>>"'',,z3FGH+BGGLLEX,YZGjjZ)<=ELLY^L_nrG $$673DX^_rXs
 	$ 	$s   D//D8 Nc           
     H   t         j                  d|         t        j                  j	                  |       s?t         j                  |  d       d| v sd| v rt        d|        d| z   }t        ||        d }g }g }| j                  d      rt        j                  | dd	
      nt        | d	
      5 }	t        |	dd      }
|
D ]  }|j                         j                  |      }|st        |      dk(  r4|d   }|Ct        |      dz
  }|j                  d       |j                  t        j                   |             t        |      dz
  |k7  rt         j#                  d       t        j$                  |dd  D cg c]  }t'        |       c}      }|j                  |       |j                  |       ||dkD  st        |      |kD  s n t        j(                  |      }|j+                  |       t-        |||      cd d d        S c c}w # 1 sw Y   y xY w)NzRead in embeddings file z, does not exist, try to download from server/\zEmbeddings file not found: zAhttps://public.ukp.informatik.tu-darmstadt.de/reimers/embeddings/z.gzrtutf8)encodingzLoad Word Embeddings
Embeddings)descunitrJ   r   r   PADDING_TOKENz\ERROR: A line in the embeddings file had more or less  dimensions than expected. Skip token.ra   )loggerinforO   rP   rc   
ValueErrorr	   endswithgziprN   r   rstripsplitr6   r8   r   zeroserrorarrayfloatr   	set_vocabr   )embeddings_file_pathr"   item_separatorr!   max_vocab_sizeurlr   vocab
embeddingsrg   iteratorliner|   wordnumvectors                   r'   from_text_filezWordEmbeddings.from_text_fileu   s    	./C.DEFww~~23KK/00\]^**d6J.J #>?S>T!UVVUXllCS./#
 $,,U3 II*D6B*V<'	 ADC&<<PH  ++N;UqQx'/+.u:>(LL1%%bhh/C&DE JN)* LLv qr"C#5:"CD!!&)T"!-.12DUVdId58 J/J&!#zUfK'	 '	6 #D7'	 '	s1   3CH?H+H=HH6HHH!)Fi@B )r!   r   r"   boolr#   int)r;   z	list[str])returnr   )T)rX   strrY   r   )rf   r   )r   r   r"   r   r   r   r   r   )__name__
__module____qualname__r   r1   r5   rF   rW   rT   staticmethodrb   r   r    r(   r'   r   r      s    
 #(%- -  	-
 -.
())
  $  #(!%'"=!== =
 = =r(   r   )
__future__r   rz   rR   loggingrO   numpyr   r   safetensors.torchr   rd   r   rU   r   r   sentence_transformers.utilr   r	   r
   r!   r   r   	getLoggerr   rv   r   r   r   r(   r'   <module>r      sS    "    	   @ @   M M 9			8	$]RYY ]r(   