Skip to content

Commit 8ed53a4

Browse files
alancuckinv-kkudrynski
authored andcommitted
[Jasper/PyT, QuartzNet/PyT] Fix Ada L40 on 23.06 base container
1 parent 54e2fb4 commit 8ed53a4

File tree

8 files changed

+10
-10
lines changed

8 files changed

+10
-10
lines changed

PyTorch/SpeechRecognition/Jasper/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ COPY requirements.txt .
2424
RUN if [[ ! -z "$(command -v conda)" ]]; then conda install -y pyyaml==5.4.1; fi
2525
RUN pip install --disable-pip-version-check -U -r requirements.txt
2626

27-
RUN pip install --force-reinstall --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.9.0
27+
RUN pip install --force-reinstall --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.27.0
2828

2929
# Copy rest of files
3030
COPY . .

PyTorch/SpeechRecognition/Jasper/common/audio.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(self, filename, target_sr=None, int_values=False, offset=0,
4545
duration=0, trim=False, trim_db=60):
4646
"""Create audio segment from samples.
4747
48-
Samples are convert float32 internally, with int scaled to [-1, 1].
48+
Samples are converted to float32 internally, with int scaled to [-1, 1].
4949
Load a file supported by librosa and return as an AudioSegment.
5050
:param filename: path of file to load
5151
:param target_sr: the desired sample rate
@@ -67,10 +67,11 @@ def __init__(self, filename, target_sr=None, int_values=False, offset=0,
6767

6868
samples = self._convert_samples_to_float32(samples)
6969
if target_sr is not None and target_sr != sample_rate:
70-
samples = librosa.core.resample(samples, sample_rate, target_sr)
70+
samples = librosa.resample(samples, orig_sr=sample_rate,
71+
target_sr=target_sr)
7172
sample_rate = target_sr
7273
if trim:
73-
samples, _ = librosa.effects.trim(samples, trim_db)
74+
samples, _ = librosa.effects.trim(samples, top_db=trim_db)
7475
self._samples = samples
7576
self._sample_rate = sample_rate
7677
if self._samples.ndim >= 2:

PyTorch/SpeechRecognition/Jasper/common/features.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def __init__(self, spec_augment=None, cutout_augment=None,
233233
window_tensor = window_fn(self.win_length,
234234
periodic=False) if window_fn else None
235235
filterbanks = torch.tensor(
236-
librosa.filters.mel(sample_rate, self.n_fft, n_mels=n_filt,
236+
librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=n_filt,
237237
fmin=lowfreq, fmax=highfreq),
238238
dtype=torch.float).unsqueeze(0)
239239
# torchscript

PyTorch/SpeechRecognition/Jasper/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
inflect==5.3.0
22
ipdb
3-
librosa==0.8.0
3+
librosa==0.9.0
44
pandas==1.5.2
55
pyyaml>=5.4
66
soundfile

PyTorch/SpeechRecognition/Jasper/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def parse_args():
5454
training.add_argument('--amp', '--fp16', action='store_true', default=False,
5555
help='Use pytorch native mixed precision training')
5656
training.add_argument('--seed', default=42, type=int, help='Random seed')
57-
training.add_argument('--local_rank', default=os.getenv('LOCAL_RANK', 0),
57+
training.add_argument('--local_rank', '--local-rank', default=os.getenv('LOCAL_RANK', 0),
5858
type=int, help='GPU id used for distributed training')
5959
training.add_argument('--pre_allocate_range', default=None, type=int, nargs=2,
6060
help='Warmup with batches of length [min, max] before training')

PyTorch/SpeechRecognition/Jasper/utils/preprocessing_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
#!/usr/bin/env python
1616
import os
1717
import multiprocessing
18-
import librosa
1918
import functools
2019

2120
import sox

PyTorch/SpeechRecognition/QuartzNet/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ COPY requirements.txt .
2424
RUN if [[ ! -z "$(command -v conda)" ]]; then conda install -y pyyaml==5.4.1; fi
2525
RUN pip install --disable-pip-version-check -U -r requirements.txt
2626

27-
RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.9.0
27+
RUN pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110==1.27.0
2828

2929
# Copy rest of files
3030
COPY . .

PyTorch/SpeechRecognition/QuartzNet/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def parse_args():
5656
training.add_argument('--amp', '--fp16', action='store_true', default=False,
5757
help='Use pytorch native mixed precision training')
5858
training.add_argument('--seed', default=None, type=int, help='Random seed')
59-
training.add_argument('--local_rank', default=os.getenv('LOCAL_RANK', 0), type=int,
59+
training.add_argument('--local_rank', '--local-rank', default=os.getenv('LOCAL_RANK', 0), type=int,
6060
help='GPU id used for distributed training')
6161
training.add_argument('--pre_allocate_range', default=None, type=int, nargs=2,
6262
help='Warmup with batches of length [min, max] before training')

0 commit comments

Comments
 (0)