Spaces:

Ahmad-01
/

Health_Care

Runtime error

App Files Files Community

Ahmad-01 commited on Oct 21

Commit

a57e772

verified ·

1 Parent(s): dda1c90

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -20

app.py CHANGED Viewed

@@ -22,11 +22,9 @@ def extract_docs(ds):
     """Extract clean text documents safely from the PubMedQA dataset."""
     docs = []
     for e in ds:
-        # Case 1: if entry is a dictionary
         if isinstance(e, dict):
             ctx = e.get("context", "")
             if isinstance(ctx, dict):
-                # Nested dict with list of contexts
                 text = ctx.get("contexts", [""])
                 if isinstance(text, list):
                     docs.append(" ".join(map(str, text)))
@@ -34,41 +32,39 @@ def extract_docs(ds):
                     docs.append(str(text))
             else:
                 docs.append(str(ctx))
-        # Case 2: if entry is already a string
         elif isinstance(e, str):
             docs.append(e)
         else:
             docs.append(str(e))
     return docs
-# Extract a small subset for demo (fast loading)
 documents = extract_docs(dataset["train"][:500])
-print(f"✅ Loaded {len(documents)} documents.")
 # ------------------------------
-# Step 2. Build embeddings
 # ------------------------------
-print("🔍 Building embeddings...")
-embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 embeddings = embed_model.encode(documents, show_progress_bar=True)
 embeddings = np.array(embeddings).astype("float32")
 index = faiss.IndexFlatL2(embeddings.shape[1])
 index.add(embeddings)
-print("✅ FAISS index built.")
 # ------------------------------
-# Step 3. Load generation model
 # ------------------------------
-print("⚙️ Loading text generation model...")
-tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
-gen_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
 # ------------------------------
-# Step 4. Define RAG answer function
 # ------------------------------
 def rag_answer(question, k=3, max_new_tokens=256):
-    """Retrieve top-k relevant chunks and generate an answer."""
     if not question.strip():
         return "Please enter a question.", ""
@@ -92,18 +88,18 @@ def ask(question, k, max_tokens):
     answer, sources = rag_answer(question, k, max_tokens)
     return answer, sources
-with gr.Blocks(title="🏥 MedQuery AI — Healthcare Knowledge Assistant") as demo:
     gr.Markdown(
         """
-        # 🏥 MedQuery AI — Healthcare Knowledge Assistant
-        Ask any **clinical or biomedical question**, and the app retrieves relevant PubMed data
-        and generates concise, evidence-based answers using Retrieval-Augmented Generation (RAG).
         """
     )
     with gr.Row():
         question = gr.Textbox(
-            label="Ask a medical question",
             placeholder="e.g. What are the diagnostic criteria for hypertension?"
         )
     with gr.Row():

     """Extract clean text documents safely from the PubMedQA dataset."""
     docs = []
     for e in ds:
         if isinstance(e, dict):
             ctx = e.get("context", "")
             if isinstance(ctx, dict):
                 text = ctx.get("contexts", [""])
                 if isinstance(text, list):
                     docs.append(" ".join(map(str, text)))
                     docs.append(str(text))
             else:
                 docs.append(str(ctx))
         elif isinstance(e, str):
             docs.append(e)
         else:
             docs.append(str(e))
     return docs
 documents = extract_docs(dataset["train"][:500])
+print(f"✅ Loaded {len(documents)} biomedical documents.")
 # ------------------------------
+# Step 2. Build embeddings (Biomedical)
 # ------------------------------
+print("🔍 Building biomedical embeddings...")
+embed_model = SentenceTransformer("pritamdeka/S-PubMedBert-MS-MARCO")
 embeddings = embed_model.encode(documents, show_progress_bar=True)
 embeddings = np.array(embeddings).astype("float32")
 index = faiss.IndexFlatL2(embeddings.shape[1])
 index.add(embeddings)
+print("✅ FAISS index built with biomedical embeddings.")
 # ------------------------------
+# Step 3. Load biomedical generation model
 # ------------------------------
+print("⚙️ Loading biomedical text generation model...")
+tokenizer = AutoTokenizer.from_pretrained("allenai/biomed-flan-t5-base")
+gen_model = AutoModelForSeq2SeqLM.from_pretrained("allenai/biomed-flan-t5-base")
 # ------------------------------
+# Step 4. Define RAG function
 # ------------------------------
 def rag_answer(question, k=3, max_new_tokens=256):
+    """Retrieve top-k relevant biomedical passages and generate an answer."""
     if not question.strip():
         return "Please enter a question.", ""
     answer, sources = rag_answer(question, k, max_tokens)
     return answer, sources
+with gr.Blocks(title="🏥 MedQuery AI — Biomedical RAG Assistant") as demo:
     gr.Markdown(
         """
+        # 🏥 MedQuery AI — Biomedical Knowledge Assistant
+        This app retrieves relevant PubMed-style passages and generates concise,
+        **evidence-based biomedical answers** using Retrieval-Augmented Generation (RAG).
         """
     )
     with gr.Row():
         question = gr.Textbox(
+            label="Ask a biomedical or clinical question",
             placeholder="e.g. What are the diagnostic criteria for hypertension?"
         )
     with gr.Row():