forked from ByteDance-Seed/VeOmni
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathqwen3_vl_dense.yaml
More file actions
38 lines (36 loc) · 834 Bytes
/
qwen3_vl_dense.yaml
File metadata and controls
38 lines (36 loc) · 834 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
model:
model_path: Qwen/Qwen3-VL-8B-Instruct
attn_implementation: flash_attention_2
data:
train_path: sharegpt4v_pretrain
data_type: conversation
chat_template: qwen3vl
max_seq_len: 4096
train_size: 80000000
mm_config:
fps: 2.0
use_audio_in_video: false
train:
output_dir: qwen3_vl_dense_sft
data_parallel_mode: fsdp2
enable_reentrant: false
use_wandb: false
wandb_project: qwen3_vl_dense
wandb_name: qwen3_vl_dense
rmpad: false
rmpad_with_pos_ids: true
ulysses_parallel_size: 1
freeze_vit: false
lr: 1.0e-5
lr_decay_style: cosine
num_train_epochs: 2
micro_batch_size: 1
global_batch_size: 16
max_steps: 500
init_device: meta
enable_profiling: true
profile_start_step: 20
profile_end_step: 21
profile_record_shapes: true
ckpt_manager: dcp
save_hf_weights: false