Spaces:
Sleeping
Sleeping
File size: 4,795 Bytes
d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f b632ded d84e52f 4e657f7 1a48548 4e657f7 d84e52f 4e657f7 2d44eee 4e657f7 e93d19d 4e657f7 e93d19d 4e657f7 e93d19d 2d44eee 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 2d44eee 4e657f7 d84e52f 4e657f7 049816d 2d44eee 4e657f7 d84e52f 1f57712 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f 4e657f7 d84e52f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import json
import numpy as np
import pandas as pd
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os
# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
# Explicit login
login(token=hf_token)
# --- Configuration ---
print("Loading RAG system on your device...")
# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data.json"
# Load data
print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
# Set data
documents = data
# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)
# Use pandas dataframe
df = pd.DataFrame(
{
"Document": documents,
"Embedding": list(embeddings), # store as list
}
)
# Load LLM Pipeline
llm = pipeline(
"text-generation",
model="google/gemma-3-4b-it", # Might not have enough storage ngl
token=hf_token
)
def clean_query_with_llm(query):
prompt_content = f"""
Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
You have access to SFU IT Knowledge Base index with 100's of chunked documents.
Generate a search question based the user's question.
If you cannot generate a search query, return just the number 0.
User's Question:
{query}
Search Query:
"""
response = llm(
prompt_content,
max_new_tokens=100,
do_sample=False,
return_full_text=False
)
return response[0]["generated_text"].strip()
# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 5):
"""
Embed the query, compute cosine similarity to each document,
and return the top_k most similar documents (as a DataFrame).
"""
query_embedding = embedding_model.encode([query])[0]
def cosine_sim(x):
x = np.array(x)
return float(
np.dot(query_embedding, x)
/ (np.linalg.norm(query_embedding) * np.linalg.norm(x))
)
df["Similarity"] = df["Embedding"].apply(cosine_sim)
results = df.sort_values(by="Similarity", ascending=False).head(top_k)
return results[["Document", "Similarity"]]
def generate_with_rag(query, top_k=5):
# goSFU specific cleaning
if "gosfu" in query.lower():
query = query.replace("gosfu", "goSFU")
# Retrieve
search_query = clean_query_with_llm(query)
results = retrieve_with_pandas(search_query)
# Turn the Series into a single string of text
# (each doc separated by a divider)
context_str = "\n\n---\n\n".join(results["Document"].tolist())
# Build a clean prompt
prompt_content = f"""
You are a SFU IT helpdesk chatbot.
Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting.
Below is new question asked by the user, and related article chunks to the user question.
If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
If there are links in the articles, provide those links in your answer.
If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
If the user DID NOT ask a question, be friendly and ask how you can help them.
Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
Do not ask the user any follow-up questions after answering them.
Question:
{query}
-- Start of Articles --
{context_str}
-- End of Articles --
Answer:"""
# Call the LLM
response = llm(
prompt_content,
max_new_tokens=500,
do_sample=False,
return_full_text=False
)
return response[0]["generated_text"].strip()
def chat_fn(message, history):
"""
Chat Interface callback
"""
answer = generate_with_rag(message, top_k=5)
return answer
demo = gr.ChatInterface(
fn=chat_fn,
title="SFU IT Chatbot",
description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)
# share=True
if __name__ == "__main__":
demo.launch() |