|
a |
|
b/src/train/train.sh |
|
|
1 |
#!/bin/bash |
|
|
2 |
|
|
|
3 |
export PYTHONPATH=$(pwd) |
|
|
4 |
|
|
|
5 |
script_dir=$(dirname "$0") |
|
|
6 |
|
|
|
7 |
deepspeed --include localhost:0,1,2,3 $script_dir/train/train.py \ |
|
|
8 |
--deepspeed $script_dir/train/ds_zero2_no_offload.json \ |
|
|
9 |
--llm_pretrained_model_name_or_path lmsys/vicuna-7b-v1.5 \ |
|
|
10 |
--train_type train_both \ |
|
|
11 |
--use_lora True \ |
|
|
12 |
--lora_r 128 \ |
|
|
13 |
--lora_alpha 256 \ |
|
|
14 |
--vision_hidden_size 1027 \ |
|
|
15 |
--source joint_all \ |
|
|
16 |
--bf16 true \ |
|
|
17 |
--fp16 false \ |
|
|
18 |
--dataloader_pin_memory True \ |
|
|
19 |
--dataloader_num_workers 8 \ |
|
|
20 |
--dataloader_persistent_workers True \ |
|
|
21 |
--output_dir $script_dir/output/llemr_vicuna \ |
|
|
22 |
--num_train_epochs 5 \ |
|
|
23 |
--per_device_train_batch_size 4 \ |
|
|
24 |
--per_device_eval_batch_size 4 \ |
|
|
25 |
--gradient_accumulation_steps 8 \ |
|
|
26 |
--evaluation_strategy steps \ |
|
|
27 |
--eval_steps 0.05 \ |
|
|
28 |
--save_strategy steps \ |
|
|
29 |
--save_steps 0.05 \ |
|
|
30 |
--save_total_limit 4 \ |
|
|
31 |
--load_best_model_at_end True \ |
|
|
32 |
--run_name llemr_vicuna \ |
|
|
33 |
--learning_rate 1e-4 \ |
|
|
34 |
--logging_steps 1 |