Spaces:

prithivMLmods
/

Vision-to-VibeVoice-en

Running on Zero

prithivMLmods commited on Dec 5, 2025

Commit

78bb69e

verified ·

1 Parent(s): e6261e6

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -146,7 +146,7 @@ tts_processor = VibeVoiceStreamingProcessor.from_pretrained(TTS_MODEL_PATH)
 tts_model = VibeVoiceStreamingForConditionalGenerationInference.from_pretrained(
     TTS_MODEL_PATH,
     torch_dtype=torch.float16,
-    device_map="cpu",
     attn_implementation="sdpa",
 )
 tts_model.eval()
@@ -350,7 +350,7 @@ image_examples = [
     ["Extract the contents. [page].", "examples/2.jpg"],
 ]
-with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
     with gr.Row():

 tts_model = VibeVoiceStreamingForConditionalGenerationInference.from_pretrained(
     TTS_MODEL_PATH,
     torch_dtype=torch.float16,
+    device_map="cuda",
     attn_implementation="sdpa",
 )
 tts_model.eval()
     ["Extract the contents. [page].", "examples/2.jpg"],
 ]
+with gr.Blocks() as demo:
     gr.Markdown("# **Vision-to-VibeVoice-en**", elem_id="main-title")
     with gr.Row():