
    sgJ                     0   d Z ddlZddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZmZmZmZ  e       rddlZ ej$                  e      Zd Z e       r e       rddlmZ ndd	lmZ  G d
 de      Z G d de      Zdad Zd Zd Zd Z d Z!ddZ"ddZ#y)z
Integration with Deepspeed
    N)partialmethod   )dep_version_check)is_accelerate_availableis_torch_availableis_torch_mlu_availableloggingc                      t         j                  j                  d      d u} | r7	 t               rt	        j
                  d      }yt	        j
                  d      }yy # t        j                  $ r Y yw xY w)N	deepspeedzdeepspeed-mluTF)	importlibutil	find_specr   importlib_metadatametadataPackageNotFoundError)package_exists_s     V/var/www/html/venv/lib/python3.12/site-packages/transformers/integrations/deepspeed.pyis_deepspeed_availabler   #   sp    ^^--k:$FN 	%'&//@"++K8A  "66 		s   A A A21A2)HfDeepSpeedConfig)objectc                   "     e Zd ZdZ fdZ xZS )r   aJ  
    This object contains a DeepSpeed configuration dictionary and can be quickly queried for things like zero stage.

    A `weakref` of this object is stored in the module's globals to be able to access the config from areas where
    things like the Trainer object is not available (e.g. `from_pretrained` and `_get_resized_embeddings`). Therefore
    it's important that this object remains alive while the program is still running.

    [`Trainer`] uses the `HfTrainerDeepSpeedConfig` subclass instead. That subclass has logic to sync the configuration
    with values of [`TrainingArguments`] by replacing special placeholder values: `"auto"`. Without this special logic
    the DeepSpeed configuration is not modified in any way.

    Args:
        config_file_or_dict (`Union[str, Dict]`): path to DeepSpeed config file or dict.

    c                 f    t        |        t        d       t        d       t        |   |       y )N
accelerater   )set_hf_deepspeed_configr   super__init__selfconfig_file_or_dict	__class__s     r   r   zHfDeepSpeedConfig.__init__L   s)    %,'+&,-    )__name__
__module____qualname____doc__r   __classcell__r!   s   @r   r   r   ;   s     . .r"   r   c                   X     e Zd ZdZ fdZd Zd Zd
dZ eed      Z	ddZ
d	 Z xZS )HfTrainerDeepSpeedConfigz
    The `HfTrainerDeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
    same lifespan as the latter.
    c                 @    t         |   |       d | _        g | _        y N)r   r   _dtype
mismatchesr   s     r   r   z!HfTrainerDeepSpeedConfig.__init__Z   s    ,-r"   c                 H    | j                   t        d      | j                   S )Nz8trainer_config_process() wasn't called yet to tell dtype)r-   
ValueError)r   s    r   dtypezHfTrainerDeepSpeedConfig.dtype_   s"    ;;WXX{{r"   c                 4    | j                  |      }|y|dk(  S )NFauto)	get_value)r   ds_key_longvals      r   is_autoz HfTrainerDeepSpeedConfig.is_autod   s"    nn[);&= r"   c           
          | j                  |      \  }}|y|j                  |      dk(  r|||<   y|sy|j                  |      }|.||k7  r(| j                  j                  d| d| d| d|        yyy)a  
        A utility method that massages the config file and can optionally verify that the values match.

        1. Replace "auto" values with `TrainingArguments` value.

        2. If it wasn't "auto" and `must_match` is true, then check that DS config matches Trainer
        config values and if mismatched add the entry to `self.mismatched` - will assert during
        `trainer_config_finalize` for one or more mismatches.

        Nr3   z- ds =z vs hf )find_config_nodegetr.   append)r   r5   hf_valhf_key
must_matchconfigds_keyds_vals           r   
fill_matchz#HfTrainerDeepSpeedConfig.fill_matchk   s     ..{;>::f'#F6NF#&F"2OO""U;-qxqQWPX#YZ #3r"   F)r?   c                 n   |j                   |j                  z  |j                  z  }| j                  d|j                  d|        | j                  d|j                  d       | j                  d|d|        | j                  d|j                  d       | j                  d|j
                  d	       | j                  d
|j                  |j                  gd       | j                  d|j                  d       | j                  d|j                  d       | j                  dd       | j                  d|j
                  d	       |j                  s|j                  r|j                  dk(  rdnd}nd}|j                  rE| j                  j!                  di       | j                  d<   |j                  | j                  d   d<   | j                  d|j                  xs |j                  xr |dk(  d       | j                  d|dk(  d       | j                  d|j"                  d       | j                  d|j$                  xs |j&                  d       | j)                  d      rt*        j,                  | _        y| j1                  d      rt*        j2                  | _        yt*        j4                  | _        y) z
        Adjust the config with `TrainingArguments` values. This stage is run during `TrainingArguments` object
        creation.
        train_micro_batch_size_per_gpuper_device_train_batch_sizegradient_accumulation_stepstrain_batch_sizeztrain_batch_size (calculated)gradient_clippingmax_grad_normzoptimizer.params.lrlearning_ratezoptimizer.params.betaszadam_beta1+adam_beta2zoptimizer.params.epsadam_epsilonzoptimizer.params.weight_decayweight_decayzscheduler.params.warmup_min_lrr   zscheduler.params.warmup_max_lrapexampN
checkpointuse_node_local_storagezfp16.enabledz%fp16|fp16_full_eval+fp16_backend(amp)zamp.enabledzfp16+fp16_backend(apex)zamp.opt_levelfp16_opt_levelzbf16.enabledzbf16|bf16_full_eval)
world_sizerF   rG   rC   rJ   rK   
adam_beta1
adam_beta2rL   rM   	fill_onlyfp16fp16_full_evalfp16_backendsave_on_each_noder@   r;   rR   bf16bf16_full_evalis_truetorchbfloat16r-   is_falsefloat32float16)r   argsauto_find_batch_sizerH   rY   s        r   trainer_config_processz/HfTrainerDeepSpeedConfig.trainer_config_process   sL     ??T-M-MMPTPpPpp,,,)$$		
 	),,)	

 	+$$		
 	+T-?-?Q-t/A/A?S$__doo.#	

 	.0A0A>R79J9JN[7;8$:L:Lo^ 99++%)%6%6&%@6eLL!!(,b(IDKK%BFBXBXDKK%&>? 	ii.4..ILE4I3	
 	|v'=?XY)<)<>NO)Id6I6ILab <<'..DK]]>*--DK--DKr"   c                 l   g d}|D cg c]  }| j                  |      s| }}t        |      dkD  rt        |j                  d      r|j                  j                  }nt        |j                  d      r t        |j                  j                        }nt        |j                  d      rAt        |j                  j                  d      r!|j                  j                  j                  }not        |j                  d      rJt        |j                  j                  d      r*t        |j                  j                  j                        }nt        d| d      | j                  d||z         | j                         r6| j                  d	t        d
|z  |z               | j                  dd|z         | j                  d|d       | j                  d|j                  |      d       t        | j                        dkD  r*dj                  | j                        }t        d| d      yc c}w )z
        This stage is run after we have the model and know num_training_steps.

        Now we can complete the configuration process.
        )$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdr   hidden_sizehidden_sizestext_configzThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.rg   rh   g?ri   
   z scheduler.params.total_num_stepsznum_training_steps (calculated)z!scheduler.params.warmup_num_stepswarmup_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)r7   lenhasattrr@   rj   maxrk   rl   r0   rV   is_zero3intrC   get_warmup_stepsr.   join)	r   rc   modelnum_training_stepshidden_size_based_keysxhidden_size_auto_keysrj   r.   s	            r   trainer_config_finalizez0HfTrainerDeepSpeedConfig.trainer_config_finalize   s   "

 -C VqdllSTo V V$%)u||]3#ll66~6!%,,";";<}5'%,,BZBZ\i:j#ll66BB}5'%,,BZBZ\j:k!%,,":":"G"GH 55J4K LYY  NNA;Q\C\]}}Ck)K78 J$ 	.-	

 	/!!"45	
 t!#4??3J'L(oq  $[ !Ws
   H1H1)NTF)r#   r$   r%   r&   r   r1   r7   rC   r   rV   re   r|   r'   r(   s   @r   r*   r*   T   s8    


![4 jU;IH(T@r"   r*   c                 .    t        j                  |       ay r,   )weakrefref_hf_deepspeed_config_weak_ref)hf_deepspeed_config_objs    r   r   r     s    
 %,KK0G$H!r"   c                      d a y r,   )r    r"   r   unset_hf_deepspeed_configr      s
     %)!r"   c                  T    t         "t               t               j                         S y)NF)r   rs   r   r"   r   is_deepspeed_zero3_enabledr   &  s&    $05R5T5`,.7799r"   c                  L    t         t               t               j                  S y r,   )r   r@   r   r"   r   deepspeed_configr   -  s#    $05R5T5`,.555r"   c                     ddl m}m} |j                  }d}d|v r!|j                  rt        d       ||      }n:|j                         rt        j                  d        j                         }d|d	<   d}	d
|v r ||      }	||	fS t        ||      r fd}
 |||
      }	||	fS  j                  |      }	||	fS )zY
    A convenience wrapper that deals with optimizer and lr scheduler configuration.
    r   )
DummyOptimDummySchedulerN	optimizerz|--adafactor was passed, but also found `optimizer` configured in the DeepSpeed config. Only one optimizer can be configured.)paramszDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	schedulerc                 f    t        j                         }d |_        |j                  |       }|S )Nrx   r   )copylr_schedulercreate_scheduler)r   trainer_copyr   rx   trainers      r   _lr_scheduler_callablez5deepspeed_optim_sched.<locals>._lr_scheduler_callable_  s=    #yy1 -1)+<<'9Y  =   $#r"   )lr_scheduler_callabler   )accelerate.utilsr   r   r@   	adafactorr0   
is_offloadloggerinfocreate_optimizer
isinstancer   )r   hf_deepspeed_configrc   rx   model_parametersr   r   r@   r   r   r   s   `  `       r   deepspeed_optim_schedr   4  s     < ''F If>>8  &67	))+KKV ,,.	26./Lf%i0& l""# i,	$ *)KabL l"" #33GYen3oLl""r"   c                    ddl m} | j                  }| j                  }| j                  j
                  j                  j                  }|j                  |||       |j                  |j                                |rH|j                         st        d      |j                  d       |j                  d       d\  }}d}	||fS d| _        t        t!        d |j#                                     }	t%        | ||||	      \  }}||fS )	a  
    Init DeepSpeed, after updating the DeepSpeed configuration with any relevant Trainer's args.

    If `resume_from_checkpoint` was passed then an attempt to resume from a previously saved checkpoint will be made.

    Args:
        trainer: Trainer object
        num_training_steps: per single gpu
        resume_from_checkpoint: path to a checkpoint if to resume from after normal DeepSpeedEngine load
        inference: launch in inference mode (no optimizer and no lr scheduler)
        auto_find_batch_size: whether to ignore the `train_micro_batch_size_per_gpu` argument as it's being
            set automatically by the auto batch size finder

    Returns: optimizer, lr_scheduler

    We may use `deepspeed_init` more than once during the life of Trainer, when we do - it's a temp hack based on:
    https://github.com/microsoft/DeepSpeed/issues/1394#issuecomment-937405374 until Deepspeed fixes a bug where it
    can't resume from a checkpoint after it did some stepping https://github.com/microsoft/DeepSpeed/issues/1612

    r   )r   zMZeRO inference only makes sense with ZeRO Stage 3 - please adjust your configr   r   )NNNc                     | j                   S r,   )requires_grad)ps    r   <lambda>z deepspeed_init.<locals>.<lambda>  s
     r"   )deepspeed.utilsr   rw   rc   acceleratorstatedeepspeed_pluginhf_ds_configr|   setLevelget_process_log_levelrs   r0   del_config_sub_treer   listfilter
parametersr   )
r   rx   	inference	ds_loggerrw   rc   r   r   r   r   s
             r   deepspeed_initr   q  s   * 4MME<<D!--33DDQQ //e=OP t1134"++-lmm 	//<//?",	< l"" !'@%BRBRBT UV"7($0BDT#
	< l""r"   c                     dd l }t        |j                  | d            }t        |      dkD  rAt        j	                  d|        | j                  ||dd      \  }}|t        d|       y t        d|       )Nr   z/global_step*zAttempting to resume from T)load_module_strictload_optimizer_statesload_lr_scheduler_statesz-[deepspeed] failed to resume from checkpoint z!Can't find a valid checkpoint at )globsortedrp   r   r   load_checkpointr0   )deepspeed_enginecheckpoint_pathr   r   deepspeed_checkpoint_dirs	load_pathr   s          r   deepspeed_load_checkpointr     s    
  &tyyO3DM1R'S T
$%)00ABC'771"&%)	 8 
	1 L_L]^__  <_<MNOOr"   r}   )T)$r&   r   importlib.metadatar   r   importlib.utilr   r   	functoolsr   dependency_versions_checkr   utilsr   r   r   r	   r^   
get_loggerr#   r   r   accelerate.utils.deepspeedr   DeepSpeedConfigbuiltinsr   r*   r   r   r   r   r   r   r   r   r   r"   r   <module>r      s     /   # 9 ` `  
		H	%  !7!9O 3. .2}0 }B !% I):#z6#rPr"   