Spaces:

JyuViole
/

E2-F5-TTS-RUS

Running

App Files Files Community

JyuViole commited on Sep 8, 2025

Commit

f258760

verified ·

1 Parent(s): f74c39c

Upload app.py

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# ruff: noqa: E402
 import gc
 import json
 import re
@@ -58,10 +57,10 @@ DEFAULT_TTS_MODEL_CFG = [
     json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
 ]
-# Конфигурация для F5-TTS-Russian (русский)
 RUSSIAN_TTS_MODEL_CFG = [
-    "hf://hotstone228/F5-TTS-Russian/model_last.safetensors",
-    "hf://hotstone228/F5-TTS-Russian/vocab.txt",
     json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
 ]
@@ -102,9 +101,9 @@ except Exception as e:
 try:
     F5TTS_russian_model = load_f5tts_russian()
-    print("F5-TTS-Russian loaded successfully.")
 except Exception as e:
-    print(f"Failed to load F5-TTS-Russian: {str(e)}")
     F5TTS_russian_model = None
 E2TTS_ema_model = load_e2tts() if USING_SPACES else None
@@ -159,7 +158,7 @@ def infer(
     # Выбор модели в зависимости от языка
     if language == "ru":
         if F5TTS_russian_model is None:
-            gr.Warning("F5-TTS-Russian model failed to load. Cannot generate Russian audio.")
             return None, None, ref_text
         ema_model = F5TTS_russian_model
     else:
@@ -215,7 +214,7 @@ with gr.Blocks() as app_credits:
 * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
 * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
 * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
-* [hotstone228](https://huggingface.co/hotstone228) for the [F5-TTS-Russian](https://huggingface.co/hotstone228/F5-TTS-Russian) model
 """)
 with gr.Blocks() as app_tts:
@@ -710,8 +709,8 @@ with gr.Blocks() as app:
 This is {"a local web UI for [F5 TTS](https://github.com/SWivid/F5-TTS)" if not USING_SPACES else "an online demo for [F5-TTS](https://github.com/SWivid/F5-TTS)"} with advanced batch processing support. This app supports the following TTS models:
 * [F5-TTS](https://arxiv.org/abs/2410.06885) (A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching) for English and Chinese
 * [E2 TTS](https://arxiv.org/abs/2406.18009) (Embarrassingly Easy Fully Non-Autoregressive Zero-Shot TTS) for English and Chinese
-* [F5-TTS-Russian](https://huggingface.co/hotstone228/F5-TTS-Russian) by [hotstone228](https://huggingface.co/hotstone228) for Russian
-The checkpoints support English, Chinese, and Russian (via F5-TTS-Russian, licensed under CC-BY-NC-SA-4.0).
 If you're having issues, try converting your reference audio to WAV or MP3, clipping it to 12s with ✂ in the bottom right corner (otherwise might have non-optimal auto-trimmed result).
 **NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<12s). Ensure the audio is fully uploaded before generating.**
 """

 import gc
 import json
 import re
     json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
 ]
+# Конфигурация для F5-TTS_RUSSIAN (русский)
 RUSSIAN_TTS_MODEL_CFG = [
+    "hf://Misha24-10/F5-TTS_RUSSIAN/F5TTS_v1_Base/model.safetensors",
+    "hf://Misha24-10/F5-TTS_RUSSIAN/vocab.txt",
     json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
 ]
 try:
     F5TTS_russian_model = load_f5tts_russian()
+    print("F5-TTS_RUSSIAN loaded successfully.")
 except Exception as e:
+    print(f"Failed to load F5-TTS_RUSSIAN: {str(e)}")
     F5TTS_russian_model = None
 E2TTS_ema_model = load_e2tts() if USING_SPACES else None
     # Выбор модели в зависимости от языка
     if language == "ru":
         if F5TTS_russian_model is None:
+            gr.Warning("F5-TTS_RUSSIAN model failed to load. Cannot generate Russian audio.")
             return None, None, ref_text
         ema_model = F5TTS_russian_model
     else:
 * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
 * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
 * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
+* [Misha24-10](https://huggingface.co/Misha24-10) for the [F5-TTS_RUSSIAN](https://huggingface.co/Misha24-10/F5-TTS_RUSSIAN) model
 """)
 with gr.Blocks() as app_tts:
 This is {"a local web UI for [F5 TTS](https://github.com/SWivid/F5-TTS)" if not USING_SPACES else "an online demo for [F5-TTS](https://github.com/SWivid/F5-TTS)"} with advanced batch processing support. This app supports the following TTS models:
 * [F5-TTS](https://arxiv.org/abs/2410.06885) (A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching) for English and Chinese
 * [E2 TTS](https://arxiv.org/abs/2406.18009) (Embarrassingly Easy Fully Non-Autoregressive Zero-Shot TTS) for English and Chinese
+* [F5-TTS_RUSSIAN](https://huggingface.co/Misha24-10/F5-TTS_RUSSIAN) by [Misha24-10](https://huggingface.co/Misha24-10) for Russian
+The checkpoints support English, Chinese, and Russian (via F5-TTS_RUSSIAN, licensed under CC-BY-NC-SA-4.0).
 If you're having issues, try converting your reference audio to WAV or MP3, clipping it to 12s with ✂ in the bottom right corner (otherwise might have non-optimal auto-trimmed result).
 **NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<12s). Ensure the audio is fully uploaded before generating.**
 """