
    sgA                        d dl Z d dlmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ  e       rd dlZ ee      Zd	d
dddddde	e
ded   ided   idZ eed   j1                               Z G d de      Z G d d      Z G d de      Z G d de      Z G d de      Z G d d e      Z G d! d"e      Z  G d# d$e      Z!eeeeee e!d%Z"d& Z#d(d'Z$y))    N)Dict
NamedTupleOptional)tqdm   )GGUF_CONFIG_MAPPINGGGUF_TENSOR_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigtensorsr   r   r   c                   @    e Zd ZU ej                  ed<   eed<   eed<   y)
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict     [/var/www/html/venv/lib/python3.12/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   r   :   s    ZZ
INr)   r   c                       e Zd ZddZd Zy)TensorProcessorNc                     |xs i | _         y Nr   )selfr   s     r*   __init__zTensorProcessor.__init__A   s    lr)   c                     t        ||i       S r.   )r   r0   r   r   kwargss       r*   processzTensorProcessor.processD   s    '4,,r)   r.   )r    r!   r"   r1   r5   r(   r)   r*   r,   r,   @   s    #-r)   r,   c            	       p     e Zd Zd fd	Zd Z	 ddej                  dedee   dej                  fdZ	 xZ
S )	LlamaTensorProcessorc                 &    t         |   |       y Nr/   superr1   r0   r   	__class__s     r*   r1   zLlamaTensorProcessor.__init__I       'r)   c                    d|v sd|v rx| j                   j                  d      }| j                   j                  d      }d ||fv rt        ||i       S d|v r| j                  |||      }nd|v r| j                  |||      }t        ||i       S )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r0   r   r   r4   	num_headsnum_kv_headss         r*   r5   zLlamaTensorProcessor.processL   s    t!3(=>I;;??+@AL	<00!'444T!77IVt#77LY'4,,r)   r   n_headrE   returnc                     |||k7  r|}|j                   d   |z  dz  } |j                  ||dg|j                   dd   }|j                  dd      j                  |j                         S )Nr      r   )shapereshapeswapaxes)r0   r   rF   rE   dimws         r*   rC   z-LlamaTensorProcessor._reverse_permute_weightsY   sr    
 #,(>!FmmA&(A-GOOFC?W]]12->?zz!Q''66r)   r.   )r    r!   r"   r1   r5   r#   r$   intr   rC   __classcell__r=   s   @r*   r7   r7   H   sE    (- OS
7zz
7+.
7>Fsm
7	
7r)   r7   c                   \     e Zd Zd fd	Zd Zdej                  deeef   dede	fdZ
 xZS )	Qwen2MoeTensorProcessorc                 &    t         |   |       y r9   r:   r<   s     r*   r1   z Qwen2MoeTensorProcessor.__init__g   r>   r)   c                     d|v rE|j                  d      }|j                  d      }|r!| j                  ||||       t        |d i       S d|v rt        j                  |d      }t        ||i       S )N_exptensor_key_mappingparsed_parametersffn_gate_inp_shexpr   axis)rB   _split_moe_expert_tensorr   r#   expand_dims)r0   r   r   r4   rW   rX   s         r*   r5   zQwen2MoeTensorProcessor.processj   sz    T>!',@!A &

+> ?!--g7H$Pbc!'4444' nnW15G'4,,r)   r   rX   r   rW   c           	         d}d|v rd}nd|v rd}nd|v rd}nt        d| d	      |D ]  }||v s|j                  |||         } | j                  j                  d
d      }t	        d|      D ]N  }|j                  dd| d| d      }	||   }
t        j                  t        j                  |
            |d   |	<   P y )N ffn_gate_exps	gate_projffn_down_exps	down_projffn_up_expsup_projzCannot map expert tensor z in Qwen2Moe architecture.num_experts<   r   z.weight.r   )	
ValueErrorreplacer   rB   rangetorch
from_numpyr#   copy)r0   r   rX   r   rW   exp_nametensor_name	w_counteri	temp_name
exp_weights              r*   r\   z0Qwen2MoeTensorProcessor._split_moe_expert_tensorw   s    
 d""H$"Hd" H8>XYZZ- 	RKd"||K1CK1PQ	R KKOOM26	q)$ 	\AY!A3az0IJI J6;6F6FrwwzGZ6[i(3	\r)   r.   )r    r!   r"   r1   r5   r#   r$   r   r&   r'   r\   rP   rQ   s   @r*   rS   rS   f   sC    (-\zz\6:39o\MP\fj\r)   rS   c                   v     e Zd Zd fd	Zd Zdej                  dedefdZdej                  dedefdZ	 xZ
S )	BloomTensorProcessorc                 &    t         |   |       y r9   r:   r<   s     r*   r1   zBloomTensorProcessor.__init__   r>   r)   c                     d|v rI| j                   d   }| j                   d   }d|v r| j                  |||      }n| j                  |||      }t        ||i       S )Nattn_qkvrF   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r0   r   r   r4   rD   n_embeds         r*   r5   zBloomTensorProcessor.process   se    H-Ikk-0G477GT44WiQ'4,,r)   r   rF   r~   c                 (   t        j                  |dd      \  }}}|j                  |||z  |      }|j                  |||z  |      }|j                  |||z  |      }t        j                  |||gd      }|j                  |dz  ||z  z  |      S )N   r   rZ   r   )r#   array_splitrK   stack)r0   r   rF   r~   qkvqkv_weightss           r*   r|   z-BloomTensorProcessor._reverse_reshape_weights   s     ..!!41aIIfg/9IIfg/9IIfg/9hh1ayq1""6A:F1B#CWMMr)   c                    t        j                  |d      \  }}}|j                  |||z        }|j                  |||z        }|j                  |||z        }t        j                  |||gd      j	                         }|S )Nr   r   rZ   )r#   r   rK   r   flatten)r0   r   rF   r~   q_biask_biasv_biasqkv_biass           r*   r}   z*BloomTensorProcessor._reverse_reshape_bias   s     "$!;6(9:6(9:6(9:88VVV41=EEGr)   r.   )r    r!   r"   r1   r5   r#   r$   rO   r|   r}   rP   rQ   s   @r*   rv   rv      sN    (-
N

 
NC 
NRU 
N
RZZ 
 
s 
r)   rv   c                   &     e Zd Zd fd	Zd Z xZS )T5TensorProcessorc                 &    t         |   |       y r9   r:   r<   s     r*   r1   zT5TensorProcessor.__init__   r>   r)   c                     d }|j                  d      D ]  }|j                         st        |      } n t        ||d|i      S )Nrh   bid)splitisdigitrO   r   )r0   r   r   r4   r   chunks         r*   r5   zT5TensorProcessor.process   sH    ZZ_ 	E}}%j	 '4%66r)   r.   r    r!   r"   r1   r5   rP   rQ   s   @r*   r   r      s    (7r)   r   c                   &     e Zd Zd fd	Zd Z xZS )GPT2TensorProcessorc                 &    t         |   |       y r9   r:   r<   s     r*   r1   zGPT2TensorProcessor.__init__   r>   r)   c                     d|v sd|v sd|v sd|v r|j                   }|dk(  rDd}|j                  di       }t        j                  t	        j
                  |            |d   |<   d }t        ||i       S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrX   r   )TrB   rl   rm   r#   rn   r   )r0   r   r   r4   rX   s        r*   r5   zGPT2TensorProcessor.process   s     % D($&#t+iiG ?" $D &

+> C161A1A"'''BR1Si(.D'4,,r)   r.   r   rQ   s   @r*   r   r      s    (-r)   r   c                   &     e Zd Zd fd	Zd Z xZS )MambaTensorProcessorc                 &    t         |   |       y r9   r:   r<   s     r*   r1   zMambaTensorProcessor.__init__   r>   r)   c                     d|v rd|vrd|vr|j                  dd      }d|v rt        j                  |d      }d|v rt        j                  |       }t	        ||i       S )	Nssm_dbiasr{   zmixer.Dzssm_conv1d.weightr   rZ   ssm_a)rj   r#   r]   logr   r3   s       r*   r5   zMambaTensorProcessor.process   sk    d?vT1hd6J <<3D$& nnW15Gd? ffgX&G'4,,r)   r.   r   rQ   s   @r*   r   r      s    (-r)   r   )llamaqwen2moebloomt5	t5encodergpt2mambac                     | j                   |   }|j                  D cg c]%  }t        |j                  |   |j                        ' c}S c c}w r.   )fieldsdatar   partstypes)readerfieldvalue_data_indexs       r*   
read_fieldr      s?    MM% EX]XbXbcekk+6Dcccs   *Ac                 	  &' t               rt               r	ddlm}m} n t
        j                  d       t        d       ||       }|j                  }t        |j                               }t        D ci c]  }|i  }}t        |d      d   }	t        |d      }
d|	v rd|
v rd}nd	|	v sd
|	v rd|d   d<   d	}n|	}d|	v rd}d|	v rSh d&d't        &fd|j                  D              }t        'fd|j                  D              }||d   d<   | |d   d<   d}d|	v rh| j                  d      d   j!                         }t#        j$                  d|      }|t'        d|	 d      |j)                         j+                  d      }|	|z   t,        vrt'        d |	|z    d!      t/        d" |j                  D              |d   d#<   |j                  j1                         D ]%  \  }}|j3                  |	|      }|j                  d$      }|d   }d$j5                  |d%d       }|j6                  D cg c]%  }t9        |j:                  |   |j<                        ' }}t?        |      d%k(  r|d   }tA        |tB              r|	|v r|j3                  |	|      }t        D ]F  }t        |   }||v s|||   v s||   |   }|dk(  r'||||   |<   ||v s6|jE                  |       H ||v st
        jG                  d&| d'|        ( d(|d   vr3|d)   }d*|v rt?        |d*         |d   d(<   nt
        jI                  d+       |rSt        d,   |	|z      }|jK                  di       }tL        jK                  |	tN              } ||-      }tQ        |j                  d./      D ]  } | jR                  }! || j6                  | jT                        }"|jW                  |"|!||0      }#|#jX                  }"|#jR                  }!|#jZ                  jK                  d1      }$|!w|D ]L  }%|%j]                  |$2      |!v s|!j3                  |%j]                  |$2      ||%   j]                  |$2            }!N t_        j`                  tc        jd                  |"            |d,   |!<    t?        |      dkD  rt
        jG                  d3|        |S c c}w c c}w )4a  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `True`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizezLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.zKPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.zgeneral.architecturezgeneral.namer   mistralr   r   Tr   is_gated_actr   	qwen2_moestablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc              3   H   K   | ]  }D ]  }||j                   v    y wr.   r   ).0tensor	bias_nameattn_bias_names      r*   	<genexpr>z'load_gguf_checkpoint.<locals>.<genexpr>4  s)     mF^lmQZyFKK/m/ms   "c              3   :   K   | ]  }|j                   v   y wr.   r   )r   r   ffn_norm_names     r*   r   z'load_gguf_checkpoint.<locals>.<genexpr>5  s     #^VMV[[$@#^s   r   use_parallel_residualr_   falcon/z-\d+b-Nz>From file name, cannot determine the number of parameters for z architecture-zArchitecture z not supportedc              3   :   K   | ]  }d |j                   k7    yw)r   Nr   )r   r   s     r*   r   z'load_gguf_checkpoint.<locals>.<genexpr>J  s      =+16;;&=s   tie_word_embeddingsrh   r   z1Some keys were not parsed and added into account z | 
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.r   r/   z,Converting and de-quantizing GGUF tensors...)desc)r   r   rW   rX   r   )r   z0Some keys of the GGUF file were not considered: )3r   r   ggufr   r   loggererrorImportErrorr   listkeysGGUF_TO_TRANSFORMERS_MAPPINGr   anyr   r   lowerresearchri   groupstripGGUF_SUPPORTED_ARCHITECTURESallitemsrj   joinr   r   r   r   len
isinstancer&   removeinfowarningrB   TENSOR_PROCESSORSr,   r   r   tensor_typer5   r   r   formatrl   rm   r#   rn   )(gguf_checkpoint_pathreturn_tensorsr   r   r   r   reader_keysr   rX   architecture
model_nameupdated_architecturer   r   
model_sizegguf_file_namemgguf_keyr   r   prefix
config_keyr   r   	parameterparameter_renamesrenamed_config_keytokenizer_parametersrW   r   ProcessorClass	processorr   r   r   resultr   rp   r   r   s(                                         @@r*   load_gguf_checkpointr     sO    13//A	
 ghh,-F]]Fv{{}%K(DE1BEEf&<=a@LFN3J ,9
#:( 
	!<6:(#N3#+\!*
 \!F"mfnnmm ##^v~~#^ ^2:(#J/CX?X(#$;<J <-33C8<BBDIIi09PQ]P^^kl  WWY__S)
j (DD=
)B(C>RSS := =5;^^= :h 56
 "==..0 b%##L2FGs#qXXeABi(
]b]g]ghk"5;;{#;U[[Ihhu:?!HEeS!le&;MM,0DEE5 	1I <Y G**z=Nv=V/V%6v%>z%J"%+%1GL%i01CD{*&&x0	1 {"KKKH:UXY^X_`a9b@ ,X660=++8;<PQY<Z8[h'5NNe
 9)D\T^E^_"&&x4*..|_M"&1	6>>0^_ 	TF;;D f.@.@AG&&#5"3	 ' F nnG;;D//%%e,C|1 v%%#%.$6<<(:(:s(:(CEWXcEdEkEkpsEkEtuDv
 271A1A"'''BR1Si(./	T2 ;!F{mTUE Fv is   5
S1*S)F)%r   typingr   r   r   numpyr#   r   integrationsr   r	   r
   r   utilsr   utils.import_utilsr   utils.loggingr   rl   r    r   r   r   r   r   r   r,   r7   rS   rv   r   r   r   r   r   r   r(   r)   r*   <module>r     s6  " 
 - -    & 1 % 	H	 !*"

 "-F\] ""5kBC$&<=O&PQ    $$@$K$P$P$RS  - -7? 7<&\o &\R$? $N
7 
7-/ -4-? -* "'!
"! d
[r)   