JyuViole commited on
Commit
f258760
·
verified ·
1 Parent(s): f74c39c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # ruff: noqa: E402
2
  import gc
3
  import json
4
  import re
@@ -58,10 +57,10 @@ DEFAULT_TTS_MODEL_CFG = [
58
  json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
59
  ]
60
 
61
- # Конфигурация для F5-TTS-Russian (русский)
62
  RUSSIAN_TTS_MODEL_CFG = [
63
- "hf://hotstone228/F5-TTS-Russian/model_last.safetensors",
64
- "hf://hotstone228/F5-TTS-Russian/vocab.txt",
65
  json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
66
  ]
67
 
@@ -102,9 +101,9 @@ except Exception as e:
102
 
103
  try:
104
  F5TTS_russian_model = load_f5tts_russian()
105
- print("F5-TTS-Russian loaded successfully.")
106
  except Exception as e:
107
- print(f"Failed to load F5-TTS-Russian: {str(e)}")
108
  F5TTS_russian_model = None
109
 
110
  E2TTS_ema_model = load_e2tts() if USING_SPACES else None
@@ -159,7 +158,7 @@ def infer(
159
  # Выбор модели в зависимости от языка
160
  if language == "ru":
161
  if F5TTS_russian_model is None:
162
- gr.Warning("F5-TTS-Russian model failed to load. Cannot generate Russian audio.")
163
  return None, None, ref_text
164
  ema_model = F5TTS_russian_model
165
  else:
@@ -215,7 +214,7 @@ with gr.Blocks() as app_credits:
215
  * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
216
  * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
217
  * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
218
- * [hotstone228](https://huggingface.co/hotstone228) for the [F5-TTS-Russian](https://huggingface.co/hotstone228/F5-TTS-Russian) model
219
  """)
220
 
221
  with gr.Blocks() as app_tts:
@@ -710,8 +709,8 @@ with gr.Blocks() as app:
710
  This is {"a local web UI for [F5 TTS](https://github.com/SWivid/F5-TTS)" if not USING_SPACES else "an online demo for [F5-TTS](https://github.com/SWivid/F5-TTS)"} with advanced batch processing support. This app supports the following TTS models:
711
  * [F5-TTS](https://arxiv.org/abs/2410.06885) (A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching) for English and Chinese
712
  * [E2 TTS](https://arxiv.org/abs/2406.18009) (Embarrassingly Easy Fully Non-Autoregressive Zero-Shot TTS) for English and Chinese
713
- * [F5-TTS-Russian](https://huggingface.co/hotstone228/F5-TTS-Russian) by [hotstone228](https://huggingface.co/hotstone228) for Russian
714
- The checkpoints support English, Chinese, and Russian (via F5-TTS-Russian, licensed under CC-BY-NC-SA-4.0).
715
  If you're having issues, try converting your reference audio to WAV or MP3, clipping it to 12s with ✂ in the bottom right corner (otherwise might have non-optimal auto-trimmed result).
716
  **NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<12s). Ensure the audio is fully uploaded before generating.**
717
  """
 
 
1
  import gc
2
  import json
3
  import re
 
57
  json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
58
  ]
59
 
60
+ # Конфигурация для F5-TTS_RUSSIAN (русский)
61
  RUSSIAN_TTS_MODEL_CFG = [
62
+ "hf://Misha24-10/F5-TTS_RUSSIAN/F5TTS_v1_Base/model.safetensors",
63
+ "hf://Misha24-10/F5-TTS_RUSSIAN/vocab.txt",
64
  json.dumps(dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)),
65
  ]
66
 
 
101
 
102
  try:
103
  F5TTS_russian_model = load_f5tts_russian()
104
+ print("F5-TTS_RUSSIAN loaded successfully.")
105
  except Exception as e:
106
+ print(f"Failed to load F5-TTS_RUSSIAN: {str(e)}")
107
  F5TTS_russian_model = None
108
 
109
  E2TTS_ema_model = load_e2tts() if USING_SPACES else None
 
158
  # Выбор модели в зависимости от языка
159
  if language == "ru":
160
  if F5TTS_russian_model is None:
161
+ gr.Warning("F5-TTS_RUSSIAN model failed to load. Cannot generate Russian audio.")
162
  return None, None, ref_text
163
  ema_model = F5TTS_russian_model
164
  else:
 
214
  * [mrfakename](https://github.com/fakerybakery) for the original [online demo](https://huggingface.co/spaces/mrfakename/E2-F5-TTS)
215
  * [RootingInLoad](https://github.com/RootingInLoad) for initial chunk generation and podcast app exploration
216
  * [jpgallegoar](https://github.com/jpgallegoar) for multiple speech-type generation & voice chat
217
+ * [Misha24-10](https://huggingface.co/Misha24-10) for the [F5-TTS_RUSSIAN](https://huggingface.co/Misha24-10/F5-TTS_RUSSIAN) model
218
  """)
219
 
220
  with gr.Blocks() as app_tts:
 
709
  This is {"a local web UI for [F5 TTS](https://github.com/SWivid/F5-TTS)" if not USING_SPACES else "an online demo for [F5-TTS](https://github.com/SWivid/F5-TTS)"} with advanced batch processing support. This app supports the following TTS models:
710
  * [F5-TTS](https://arxiv.org/abs/2410.06885) (A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching) for English and Chinese
711
  * [E2 TTS](https://arxiv.org/abs/2406.18009) (Embarrassingly Easy Fully Non-Autoregressive Zero-Shot TTS) for English and Chinese
712
+ * [F5-TTS_RUSSIAN](https://huggingface.co/Misha24-10/F5-TTS_RUSSIAN) by [Misha24-10](https://huggingface.co/Misha24-10) for Russian
713
+ The checkpoints support English, Chinese, and Russian (via F5-TTS_RUSSIAN, licensed under CC-BY-NC-SA-4.0).
714
  If you're having issues, try converting your reference audio to WAV or MP3, clipping it to 12s with ✂ in the bottom right corner (otherwise might have non-optimal auto-trimmed result).
715
  **NOTE: Reference text will be automatically transcribed with Whisper if not provided. For best results, keep your reference clips short (<12s). Ensure the audio is fully uploaded before generating.**
716
  """