|
|
@@ -12,24 +12,36 @@ from tools.file import AUDIO_EXTENSIONS, list_files
|
|
|
|
|
|
threshold = 10 ** (-50 / 20.0)
|
|
|
|
|
|
-
|
|
|
def process(file):
|
|
|
waveform, sample_rate = torchaudio.load(str(file), backend="sox")
|
|
|
+ if waveform.size(0) > 1:
|
|
|
+ waveform = waveform.mean(dim=0, keepdim=True)
|
|
|
+
|
|
|
loudness = librosa.feature.rms(
|
|
|
y=waveform.numpy().squeeze(), frame_length=2048, hop_length=512, center=True
|
|
|
)[0]
|
|
|
+
|
|
|
for i in range(len(loudness) - 1, 0, -1):
|
|
|
if loudness[i] > threshold:
|
|
|
break
|
|
|
|
|
|
- silent_time = (len(loudness) - i) * 512 / sample_rate
|
|
|
+ end_silent_time = (len(loudness) - i) * 512 / sample_rate
|
|
|
|
|
|
- if silent_time <= 0.3:
|
|
|
- random_time = random.uniform(0.3, 0.7)
|
|
|
+ if end_silent_time <= 0.3:
|
|
|
+ random_time = random.uniform(0.3, 0.7) - end_silent_time
|
|
|
waveform = F.pad(
|
|
|
waveform, (0, int(random_time * sample_rate)), mode="constant", value=0
|
|
|
)
|
|
|
|
|
|
+ for i in range(len(loudness)):
|
|
|
+ if loudness[i] > threshold:
|
|
|
+ break
|
|
|
+
|
|
|
+ start_silent_time = i * 512 / sample_rate
|
|
|
+
|
|
|
+ if start_silent_time > 0.02:
|
|
|
+ waveform = waveform[:, int((start_silent_time - 0.02) * sample_rate) :]
|
|
|
+
|
|
|
torchaudio.save(uri=str(file), src=waveform, sample_rate=sample_rate)
|
|
|
|
|
|
|