This code is based on transformers.
export output_dir=OUTPUT_DIR
export train_file=TRAIN_FILE
export n=23
export alpha=0.1
export lambda=0.1
accelerate launch examples/pytorch/language-modeling/run_clm_instruction_alpaca_diffhead.py \
--train_file=$train_file \
--output_dir=$output_dir \
--model_name_or_path=meta-llama/Llama-2-7b-hf\
--intermediate_id=$n \
--label_smooth=$alpha \
--loss_w=$lambdaexport output_dir=OUTPUT_DIR
export train_file=TRAIN_FILE
accelerate launch examples/pytorch/language-modeling/run_clm_instruction_alpaca.py \
--train_file=$train_file \
--output_dir=$output_dir \
--model_name_or_path=meta-llama/Llama-2-7b-hfexport output_dir=OUTPUT_DIR
export train_file=TRAIN_FILE
export n=23
export alpha=0.1
export lambda=0
accelerate launch examples/pytorch/language-modeling/run_clm_instruction_alpaca_diffhead.py \
--train_file=$train_file \
--output_dir=$output_dir \
--model_name_or_path=meta-llama/Llama-2-7b-hf\
--intermediate_id=$n \
--label_smooth=$alpha \
--loss_w=$lambda