Spaces:

jblast94
/

voice-agent-ui

Sleeping

App Files Files Community

jblast94 commited on Nov 13

Commit

f60bb93

verified ·

1 Parent(s): 6310325

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -148

app.py CHANGED Viewed

@@ -2,163 +2,62 @@ import gradio as gr
 import os
 import requests
 import base64
-from typing import List, Optional, Tuple, Any
-# Environment-driven configuration (HF + local dev)
-HF_ORCHESTRATOR_URL = os.getenv("HF_ORCHESTRATOR_URL")
-HF_ORCHESTRATOR_WEBHOOK_SECRET = os.getenv("HF_ORCHESTRATOR_WEBHOOK_SECRET")
-# Optional for future Supabase direct reads (allowed: anon key only)
-HF_SUPABASE_URL = os.getenv("HF_SUPABASE_URL")
-HF_SUPABASE_ANON_KEY = os.getenv("HF_SUPABASE_ANON_KEY")
-# Local dev fallback so this app works outside HF:
-# You can run a compatible orchestrator on localhost and point here.
-LOCAL_ORCHESTRATOR_FALLBACK = os.getenv("LOCAL_ORCHESTRATOR_FALLBACK", "http://localhost:5678/webhook/voice-agent")
-def _get_orchestrator_url() -> str:
-    """
-    Resolve orchestrator URL.
-    Precedence:
-    - HF_ORCHESTRATOR_URL (HF Space / production)
-    - LOCAL_ORCHESTRATOR_FALLBACK (local dev)
-    """
-    return HF_ORCHESTRATOR_URL or LOCAL_ORCHESTRATOR_FALLBACK
-def _post_orchestrator(payload: dict) -> dict:
-    url = _get_orchestrator_url()
-    try:
-        resp = requests.post(url, json=payload, timeout=60)
-        resp.raise_for_status()
-        return resp.json()
-    except requests.exceptions.HTTPError as e:
-        # Surface orchestrator error payload when available
-        try:
-            data = resp.json()
-        except Exception:
-            data = {"error": resp.text}
-        return {"error": f"orchestrator_http_error", "details": str(e), "payload": data}
-    except Exception as e:
-        return {"error": "orchestrator_unreachable", "details": str(e)}
-def _build_payload(
-    conversation_id: Optional[str],
-    audio_b64: Optional[str],
-    text: Optional[str],
-) -> dict:
-    return {
-        "secret": HF_ORCHESTRATOR_WEBHOOK_SECRET,
-        "conversation_id": conversation_id,
-        "audio": audio_b64,
-        "text": text,
-    }
-def chat(message: str, history: List[List[str]], state: dict) -> Tuple[List[List[str]], dict]:
-    """
-    Text-only interaction with orchestrator.
-    - Uses orchestrator contract defined in docs/voice-agent-mcp-architecture.md.
-    - Threads conversation_id from previous turns via state.
-    """
-    conversation_id = state.get("conversation_id")
-    payload = _build_payload(conversation_id=conversation_id, audio_b64=None, text=message)
-    data = _post_orchestrator(payload)
-    if "error" in data:
-        history = history + [[message, f"Error: {data.get('error')}"]]
-        return history, state
-    # Update conversation_id for continuity
-    new_conversation_id = data.get("conversation_id") or conversation_id
-    state["conversation_id"] = new_conversation_id
-    transcript = data.get("transcript_text") or ""
     assistant = data.get("assistant_response_text") or ""
-    reply = assistant or "(no response)"
-    if transcript:
-        # show both transcript (if any) and assistant message
-        history = history + [[f"You: {message}", f"ASR: {transcript}\n\nAssistant: {reply}"]]
-    else:
-        history = history + [[message, reply]]
-    return history, state
-def voice(audio_path: str, history: List[List[str]], state: dict) -> Tuple[List[List[str]], Any, dict]:
-    """
-    Voice interaction:
-    - Reads recorded audio file.
-    - Encodes to base64 and sends as `audio` to orchestrator.
-    - Threads conversation_id via state.
-    - Expects orchestrator to handle STT, LLM, TTS, and return contract fields.
-    """
-    conversation_id = state.get("conversation_id")
     audio_b64 = None
     if audio_path:
-        try:
-            with open(audio_path, "rb") as f:
-                audio_b64 = base64.b64encode(f.read()).decode("utf-8")
-        except Exception as e:
-            history = history + [["", f"Error reading audio: {e}"]]
-            return history, None, state
-    if not audio_b64:
-        history = history + [["", "Error: No audio captured"]]
-        return history, None, state
-    payload = _build_payload(conversation_id=conversation_id, audio_b64=audio_b64, text=None)
-    data = _post_orchestrator(payload)
-    if "error" in data:
-        history = history + [["", f"Error: {data.get('error')}"]]
-        return history, None, state
-    # Update conversation_id
-    new_conversation_id = data.get("conversation_id") or conversation_id
-    state["conversation_id"] = new_conversation_id
-    transcript = data.get("transcript_text") or ""
     assistant = data.get("assistant_response_text") or ""
-    reply = assistant or "(no response)"
-    # Append transcript + assistant message
-    user_side = f"(voice) {transcript}" if transcript else "(voice input)"
-    history = history + [[user_side, reply]]
-    # Orchestrator may provide tts_audio_url (URL or data URI) which HF/Gradio can play if wired.
-    # For now we just ignore it in UI; audio playback can be handled by a custom component.
-    return history, None, state
 with gr.Blocks() as demo:
-    gr.Markdown("# Voice Agent (HF Space ↔ n8n Orchestrator)")
-    # Shared state for conversation_id and any future metadata
-    state = gr.State({"conversation_id": None})
-    chatbox = gr.Chatbot(label="Conversation")
-    msg = gr.Textbox(label="Type a message")
     send_btn = gr.Button("Send")
-    audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Talk")
-    audio_btn = gr.Button("Send Voice")
-    # Text chat wired to orchestrator
-    send_btn.click(
-        fn=chat,
-        inputs=[msg, chatbox, state],
-        outputs=[chatbox, state],
-    )
-    # Voice chat wired to orchestrator
-    audio_btn.click(
-        fn=voice,
-        inputs=[audio_in, chatbox, state],
-        outputs=[chatbox, audio_in, state],
-    )
 if __name__ == "__main__":
-    # Works for both local dev and HF Space
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import requests
 import base64
+ORCHESTRATOR_URL = os.getenv("HF_ORCHESTRATOR_URL", "")
+WEBHOOK_SECRET = os.getenv("HF_ORCHESTRATOR_WEBHOOK_SECRET", "")
+BASIC_USER = os.getenv("N8N_BASIC_AUTH_USER", "")
+BASIC_PASS = os.getenv("N8N_BASIC_AUTH_PASSWORD", "")
+def _headers():
+    headers = {"x-webhook-secret": WEBHOOK_SECRET, "Content-Type": "application/json"}
+    if BASIC_USER and BASIC_PASS:
+        import base64 as b64
+        token = b64.b64encode(f"{BASIC_USER}:{BASIC_PASS}".encode()).decode()
+        headers["Authorization"] = f"Basic {token}"
+    return headers
+def chat_send(message, history, conversation_id):
+    payload = {"secret": WEBHOOK_SECRET, "conversation_id": conversation_id, "text": message}
+    r = requests.post(f"{ORCHESTRATOR_URL}/voice-agent", json=payload, headers=_headers())
+    data = r.json()
+    if r.status_code != 200 or data.get("error"):
+        return history + [[message, data.get("error", "Error")]], conversation_id, ""
     assistant = data.get("assistant_response_text") or ""
+    convo = data.get("conversation_id") or conversation_id
+    audio_html = ""
+    if data.get("tts_audio_url"):
+        audio_html = f"<audio controls src=\"{data['tts_audio_url']}\"></audio>"
+    return history + [[message, assistant]], convo, audio_html
+def voice_send(audio_path, history, conversation_id):
     audio_b64 = None
     if audio_path:
+        with open(audio_path, "rb") as f:
+            audio_b64 = base64.b64encode(f.read()).decode("utf-8")
+    payload = {"secret": WEBHOOK_SECRET, "conversation_id": conversation_id, "audio": audio_b64}
+    r = requests.post(f"{ORCHESTRATOR_URL}/voice-agent", json=payload, headers=_headers())
+    data = r.json()
+    if r.status_code != 200 or data.get("error"):
+        return history + [["", data.get("error", "Error")]], conversation_id, ""
+    user_text = data.get("transcript_text", "")
     assistant = data.get("assistant_response_text") or ""
+    convo = data.get("conversation_id") or conversation_id
+    audio_html = ""
+    if data.get("tts_audio_url"):
+        audio_html = f"<audio controls src=\"{data['tts_audio_url']}\"></audio>"
+    return history + [[user_text, assistant]], convo, audio_html
 with gr.Blocks() as demo:
+    gr.Markdown("# Voice Agent")
+    chatbox = gr.Chatbot()
+    msg = gr.Textbox(label="Message")
     send_btn = gr.Button("Send")
+    mic = gr.Audio(sources=["microphone"], type="filepath", label="Talk")
+    voice_btn = gr.Button("Send Voice")
+    audio_out = gr.HTML()
+    conversation_state = gr.State(value=None)
+    send_btn.click(chat_send, [msg, chatbox, conversation_state], [chatbox, conversation_state, audio_out])
+    voice_btn.click(voice_send, [mic, chatbox, conversation_state], [chatbox, conversation_state, audio_out])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)