torchrun --nproc_per_node=1 --master_port=9778 fastchat/train/train_flant5.py
--model_name_or_path lmsys/fastchat-t5-3b-v1.0
--data_path /workspace/processed_data.json
--bf16 True
--output_dir ./workspace/RYRMODEL
--num_train_epochs 3
--per_device_train_batch_size 1
--per_device_eval_batch_size 1
--gradient_accumulation_steps 4
--evaluation_strategy "no"
--save_strategy "steps"
--save_steps 300
--save_total_limit 1
--learning_rate 2e-5
--weight_decay 0.
--warmup_ratio 0.03
--lr_scheduler_type "cosine"
--logging_steps 1
--fsdp "full_shard auto_wrap"
--fsdp_transformer_layer_cls_to_wrap T5Block
--tf32 True
--model_max_length 2048
--preprocessed_path ./preprocessed_data/processed.json
--gradient_checkpointing True