2 年前 · 118c27effc
--- a/dockerfile
+++ b/dockerfile
@@ -24,12 +24,10 @@ ENV SHELL=/usr/bin/zsh
 
															 # Setup flash-attn
														
 
															 RUN pip3 install --upgrade pip && \
														
 
															     pip3 install ninja packaging && \
														
 
															-    MAX_JOBS=4 pip3 install flash-attn --no-build-isolation
														
 
															+    MAX_JOBS=4 pip3 install git+https://github.com/facebookresearch/xformers.git@v0.0.22
														
 
															 # Project Env
														
 
															 WORKDIR /exp
														
 
															-COPY requirements.txt .
														
 
															-RUN pip3 install -r requirements.txt && pip3 install encodec --no-deps
														
 
															 COPY . .
														
 
															 RUN pip3 install -e .
														
--- a/fish_speech/configs/vqgan.yaml
+++ b/fish_speech/configs/vqgan.yaml
@@ -18,7 +18,7 @@ hop_length: 256
 
															 num_mels: 80
														
 
															 n_fft: 1024
														
 
															 win_length: 1024
														
 
															-segment_size: 512
														
 
															+segment_size: 256
														
 
															 # Dataset Configuration
														
 
															 train_dataset:
														
@@ -39,7 +39,7 @@ data:
 
															   train_dataset: ${train_dataset}
														
 
															   val_dataset: ${val_dataset}
														
 
															   num_workers: 4
														
 
															-  batch_size: 16
														
 
															+  batch_size: 32
														
 
															   val_batch_size: 4
														
 
															 # Model Configuration
														
@@ -48,7 +48,7 @@ model:
 
															   sample_rate: ${sample_rate}
														
 
															   hop_length: ${hop_length}
														
 
															   segment_size: 8192
														
 
															-  freeze_hifigan: true
														
 
															+  freeze_hifigan: false
														
 
															   downsample:
														
 
															     _target_: fish_speech.models.vq_diffusion.lit_module.ConvDownSample