jblast94 commited on
Commit
f60bb93
·
verified ·
1 Parent(s): 6310325

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -148
app.py CHANGED
@@ -2,163 +2,62 @@ import gradio as gr
2
  import os
3
  import requests
4
  import base64
5
- from typing import List, Optional, Tuple, Any
6
 
7
- # Environment-driven configuration (HF + local dev)
8
- HF_ORCHESTRATOR_URL = os.getenv("HF_ORCHESTRATOR_URL")
9
- HF_ORCHESTRATOR_WEBHOOK_SECRET = os.getenv("HF_ORCHESTRATOR_WEBHOOK_SECRET")
10
-
11
- # Optional for future Supabase direct reads (allowed: anon key only)
12
- HF_SUPABASE_URL = os.getenv("HF_SUPABASE_URL")
13
- HF_SUPABASE_ANON_KEY = os.getenv("HF_SUPABASE_ANON_KEY")
14
-
15
- # Local dev fallback so this app works outside HF:
16
- # You can run a compatible orchestrator on localhost and point here.
17
- LOCAL_ORCHESTRATOR_FALLBACK = os.getenv("LOCAL_ORCHESTRATOR_FALLBACK", "http://localhost:5678/webhook/voice-agent")
18
-
19
- def _get_orchestrator_url() -> str:
20
- """
21
- Resolve orchestrator URL.
22
-
23
- Precedence:
24
- - HF_ORCHESTRATOR_URL (HF Space / production)
25
- - LOCAL_ORCHESTRATOR_FALLBACK (local dev)
26
- """
27
- return HF_ORCHESTRATOR_URL or LOCAL_ORCHESTRATOR_FALLBACK
28
-
29
- def _post_orchestrator(payload: dict) -> dict:
30
- url = _get_orchestrator_url()
31
- try:
32
- resp = requests.post(url, json=payload, timeout=60)
33
- resp.raise_for_status()
34
- return resp.json()
35
- except requests.exceptions.HTTPError as e:
36
- # Surface orchestrator error payload when available
37
- try:
38
- data = resp.json()
39
- except Exception:
40
- data = {"error": resp.text}
41
- return {"error": f"orchestrator_http_error", "details": str(e), "payload": data}
42
- except Exception as e:
43
- return {"error": "orchestrator_unreachable", "details": str(e)}
44
-
45
- def _build_payload(
46
- conversation_id: Optional[str],
47
- audio_b64: Optional[str],
48
- text: Optional[str],
49
- ) -> dict:
50
- return {
51
- "secret": HF_ORCHESTRATOR_WEBHOOK_SECRET,
52
- "conversation_id": conversation_id,
53
- "audio": audio_b64,
54
- "text": text,
55
- }
56
-
57
- def chat(message: str, history: List[List[str]], state: dict) -> Tuple[List[List[str]], dict]:
58
- """
59
- Text-only interaction with orchestrator.
60
-
61
- - Uses orchestrator contract defined in docs/voice-agent-mcp-architecture.md.
62
- - Threads conversation_id from previous turns via state.
63
- """
64
- conversation_id = state.get("conversation_id")
65
- payload = _build_payload(conversation_id=conversation_id, audio_b64=None, text=message)
66
- data = _post_orchestrator(payload)
67
-
68
- if "error" in data:
69
- history = history + [[message, f"Error: {data.get('error')}"]]
70
- return history, state
71
-
72
- # Update conversation_id for continuity
73
- new_conversation_id = data.get("conversation_id") or conversation_id
74
- state["conversation_id"] = new_conversation_id
75
-
76
- transcript = data.get("transcript_text") or ""
77
  assistant = data.get("assistant_response_text") or ""
78
- reply = assistant or "(no response)"
79
-
80
- if transcript:
81
- # show both transcript (if any) and assistant message
82
- history = history + [[f"You: {message}", f"ASR: {transcript}\n\nAssistant: {reply}"]]
83
- else:
84
- history = history + [[message, reply]]
85
-
86
- return history, state
87
-
88
- def voice(audio_path: str, history: List[List[str]], state: dict) -> Tuple[List[List[str]], Any, dict]:
89
- """
90
- Voice interaction:
91
-
92
- - Reads recorded audio file.
93
- - Encodes to base64 and sends as `audio` to orchestrator.
94
- - Threads conversation_id via state.
95
- - Expects orchestrator to handle STT, LLM, TTS, and return contract fields.
96
- """
97
- conversation_id = state.get("conversation_id")
98
 
 
99
  audio_b64 = None
100
  if audio_path:
101
- try:
102
- with open(audio_path, "rb") as f:
103
- audio_b64 = base64.b64encode(f.read()).decode("utf-8")
104
- except Exception as e:
105
- history = history + [["", f"Error reading audio: {e}"]]
106
- return history, None, state
107
-
108
- if not audio_b64:
109
- history = history + [["", "Error: No audio captured"]]
110
- return history, None, state
111
-
112
- payload = _build_payload(conversation_id=conversation_id, audio_b64=audio_b64, text=None)
113
- data = _post_orchestrator(payload)
114
-
115
- if "error" in data:
116
- history = history + [["", f"Error: {data.get('error')}"]]
117
- return history, None, state
118
-
119
- # Update conversation_id
120
- new_conversation_id = data.get("conversation_id") or conversation_id
121
- state["conversation_id"] = new_conversation_id
122
-
123
- transcript = data.get("transcript_text") or ""
124
  assistant = data.get("assistant_response_text") or ""
125
- reply = assistant or "(no response)"
126
-
127
- # Append transcript + assistant message
128
- user_side = f"(voice) {transcript}" if transcript else "(voice input)"
129
- history = history + [[user_side, reply]]
130
-
131
- # Orchestrator may provide tts_audio_url (URL or data URI) which HF/Gradio can play if wired.
132
- # For now we just ignore it in UI; audio playback can be handled by a custom component.
133
- return history, None, state
134
 
135
  with gr.Blocks() as demo:
136
- gr.Markdown("# Voice Agent (HF Space ↔ n8n Orchestrator)")
137
-
138
- # Shared state for conversation_id and any future metadata
139
- state = gr.State({"conversation_id": None})
140
-
141
- chatbox = gr.Chatbot(label="Conversation")
142
- msg = gr.Textbox(label="Type a message")
143
  send_btn = gr.Button("Send")
144
-
145
- audio_in = gr.Audio(sources=["microphone"], type="filepath", label="Talk")
146
- audio_btn = gr.Button("Send Voice")
147
-
148
- # Text chat wired to orchestrator
149
- send_btn.click(
150
- fn=chat,
151
- inputs=[msg, chatbox, state],
152
- outputs=[chatbox, state],
153
- )
154
-
155
- # Voice chat wired to orchestrator
156
- audio_btn.click(
157
- fn=voice,
158
- inputs=[audio_in, chatbox, state],
159
- outputs=[chatbox, audio_in, state],
160
- )
161
 
162
  if __name__ == "__main__":
163
- # Works for both local dev and HF Space
164
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
2
  import os
3
  import requests
4
  import base64
 
5
 
6
+ ORCHESTRATOR_URL = os.getenv("HF_ORCHESTRATOR_URL", "")
7
+ WEBHOOK_SECRET = os.getenv("HF_ORCHESTRATOR_WEBHOOK_SECRET", "")
8
+ BASIC_USER = os.getenv("N8N_BASIC_AUTH_USER", "")
9
+ BASIC_PASS = os.getenv("N8N_BASIC_AUTH_PASSWORD", "")
10
+
11
+ def _headers():
12
+ headers = {"x-webhook-secret": WEBHOOK_SECRET, "Content-Type": "application/json"}
13
+ if BASIC_USER and BASIC_PASS:
14
+ import base64 as b64
15
+ token = b64.b64encode(f"{BASIC_USER}:{BASIC_PASS}".encode()).decode()
16
+ headers["Authorization"] = f"Basic {token}"
17
+ return headers
18
+
19
+ def chat_send(message, history, conversation_id):
20
+ payload = {"secret": WEBHOOK_SECRET, "conversation_id": conversation_id, "text": message}
21
+ r = requests.post(f"{ORCHESTRATOR_URL}/voice-agent", json=payload, headers=_headers())
22
+ data = r.json()
23
+ if r.status_code != 200 or data.get("error"):
24
+ return history + [[message, data.get("error", "Error")]], conversation_id, ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  assistant = data.get("assistant_response_text") or ""
26
+ convo = data.get("conversation_id") or conversation_id
27
+ audio_html = ""
28
+ if data.get("tts_audio_url"):
29
+ audio_html = f"<audio controls src=\"{data['tts_audio_url']}\"></audio>"
30
+ return history + [[message, assistant]], convo, audio_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def voice_send(audio_path, history, conversation_id):
33
  audio_b64 = None
34
  if audio_path:
35
+ with open(audio_path, "rb") as f:
36
+ audio_b64 = base64.b64encode(f.read()).decode("utf-8")
37
+ payload = {"secret": WEBHOOK_SECRET, "conversation_id": conversation_id, "audio": audio_b64}
38
+ r = requests.post(f"{ORCHESTRATOR_URL}/voice-agent", json=payload, headers=_headers())
39
+ data = r.json()
40
+ if r.status_code != 200 or data.get("error"):
41
+ return history + [["", data.get("error", "Error")]], conversation_id, ""
42
+ user_text = data.get("transcript_text", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  assistant = data.get("assistant_response_text") or ""
44
+ convo = data.get("conversation_id") or conversation_id
45
+ audio_html = ""
46
+ if data.get("tts_audio_url"):
47
+ audio_html = f"<audio controls src=\"{data['tts_audio_url']}\"></audio>"
48
+ return history + [[user_text, assistant]], convo, audio_html
 
 
 
 
49
 
50
  with gr.Blocks() as demo:
51
+ gr.Markdown("# Voice Agent")
52
+ chatbox = gr.Chatbot()
53
+ msg = gr.Textbox(label="Message")
 
 
 
 
54
  send_btn = gr.Button("Send")
55
+ mic = gr.Audio(sources=["microphone"], type="filepath", label="Talk")
56
+ voice_btn = gr.Button("Send Voice")
57
+ audio_out = gr.HTML()
58
+ conversation_state = gr.State(value=None)
59
+ send_btn.click(chat_send, [msg, chatbox, conversation_state], [chatbox, conversation_state, audio_out])
60
+ voice_btn.click(voice_send, [mic, chatbox, conversation_state], [chatbox, conversation_state, audio_out])
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
 
63
  demo.launch(server_name="0.0.0.0", server_port=7860)