před 2 roky · 900fc9dec9
--- a/speech_lm/configs/pretrain.yaml
+++ b/speech_lm/configs/pretrain.yaml
@@ -48,10 +48,7 @@ dataloader:
 
															   batch_size: ${schedule.micro_batch_size}
														
 
															   num_workers: 4
														
 
															   collate_fn:
														
 
															-    _target_: transformers.DataCollatorWithPadding
														
 
															-    tokenizer: ${tokenizer}
														
 
															-    max_length: ${schedule.max_length}
														
 
															-    padding: max_length
														
 
															+    _target_: transformers.DefaultDataCollator
														
 
															 optimizer:
														
 
															   _target_: torch.optim.AdamW
														
--- a/speech_lm/train.py
+++ b/speech_lm/train.py
@@ -1,4 +1,3 @@
 
															-import logging
														
 
															 from pathlib import Path
														
 
															 import hydra
														
@@ -40,9 +39,11 @@ def train(
 
															     while global_step < cfg.schedule.max_steps:
														
 
															         for batch in dataloader:
														
 
															+            # Accumulate gradients
														
 
															             is_accumulating = (
														
 
															                 accumulate_steps % cfg.schedule.gradient_accumulation_steps != 0
														
 
															             )
														
 
															+            accumulate_steps += 1
														
 
															             # Train one step
														
 
															             with fabric.no_backward_sync(model, enabled=is_accumulating):
														
@@ -50,7 +51,6 @@ def train(
 
															                 fabric.backward(loss)
														
 
															             if is_accumulating:
														
 
															-                accumulate_steps += 1
														
 
															                 continue
														
 
															             # Perform gradient clipping