From ce8ff7fd354dbb7b8d3455045b05e00e95227f69 Mon Sep 17 00:00:00 2001 From: JustinSabatini <49084073+JustinSabatini@users.noreply.github.com> Date: Fri, 14 Mar 2025 15:35:15 -0400 Subject: [PATCH] Update tts_realtime_demo.ipynb Fix ESPnet2-TTS Colab Notebook: Update dependencies & ensure correct synthesis setup This commit addresses three key issues in the ESPnet2-TTS realtime demonstration notebook: - Fixed outdated SciPy import: Updated the installation step to use the 202412 version of ESPnet2, resolving an outdated kaiser import issue. - Ensured nltk dataset availability: Added a condition in the "Model Setup" section to automatically download the required dataset before English synthesis. - Resolved Japanese synthesis dependency issue: Introduced an additional condition to ensure pypopenjtalk is installed before running Japanese synthesis. These fixes improve first-time usability by preventing common dependency errors in the notebook. --- ESPnet2/Demo/TTS/tts_realtime_demo.ipynb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ESPnet2/Demo/TTS/tts_realtime_demo.ipynb b/ESPnet2/Demo/TTS/tts_realtime_demo.ipynb index 21bad3bb..6b0b6770 100644 --- a/ESPnet2/Demo/TTS/tts_realtime_demo.ipynb +++ b/ESPnet2/Demo/TTS/tts_realtime_demo.ipynb @@ -1,3 +1,4 @@ + { "nbformat": 4, "nbformat_minor": 0, @@ -56,7 +57,7 @@ }, "source": [ "# NOTE: pip shows imcompatible errors due to preinstalled libraries but you do not need to care\n", - "!pip install -q espnet==202308 pypinyin==0.44.0 parallel_wavegan==0.5.4 gdown==4.4.0 espnet_model_zoo\n" + "!pip install -q espnet==202412 pypinyin==0.44.0 parallel_wavegan==0.5.4 gdown==4.4.0 espnet_model_zoo\n" ], "execution_count": null, "outputs": [] @@ -192,7 +193,13 @@ " # Only for VITS\n", " noise_scale=0.333,\n", " noise_scale_dur=0.333,\n", - ")" + ")\n", + "\n", + "if lang == \"English\":\n", + " import nltk\n", + " nltk.download('averaged_perceptron_tagger_eng')\n", + "elif lang == \"Japanese\":\n", + " !pip install -q pyopenjtalk" ], "execution_count": null, "outputs": []