Unverified Commit e611979a by hoshi-hiyouga Committed by GitHub

[perf] enable multiproc dataloader in sft trainer (#122)

- Without multiproc

Train 1/2: 1%|▍ | 20/3934 [01:38<5:14:50, 4.83s/it

Avg GPU utilization: 55%

- With multiproc

Train 1/2: 1%|▍ | 20/3934 [01:00<2:57:09, 2.72s/it]

Avg GPU utilization: 95%
parent 52365618
......@@ -120,6 +120,8 @@ class FSDPSFTTrainer(object):
self.train_dataloader = DataLoader(dataset=self.train_dataset,
batch_size=config.data.train_batch_size,
sampler=self.train_sampler,
num_workers=8,
pin_memory=True,
drop_last=True)
self.val_sampler = DistributedSampler(self.val_dataset,
......@@ -130,6 +132,8 @@ class FSDPSFTTrainer(object):
self.val_dataloader = DataLoader(dataset=self.val_dataset,
batch_size=config.data.micro_batch_size,
sampler=self.val_sampler,
num_workers=8,
pin_memory=True,
drop_last=True)
def _build_model_optimizer(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment