File size: 4,795 Bytes
d84e52f
 
 
4e657f7
 
d84e52f
 
 
 
 
 
4e657f7
d84e52f
 
4e657f7
d84e52f
4e657f7
d84e52f
 
4e657f7
 
d84e52f
4e657f7
d84e52f
b632ded
d84e52f
4e657f7
 
1a48548
4e657f7
d84e52f
4e657f7
 
2d44eee
4e657f7
 
 
 
 
 
 
 
 
 
 
e93d19d
4e657f7
e93d19d
 
4e657f7
 
e93d19d
2d44eee
4e657f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d84e52f
4e657f7
 
d84e52f
 
 
4e657f7
 
 
 
 
 
 
 
 
 
 
 
 
 
2d44eee
 
 
 
4e657f7
 
 
 
 
 
d84e52f
4e657f7
049816d
2d44eee
4e657f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d84e52f
1f57712
4e657f7
d84e52f
 
 
 
4e657f7
d84e52f
 
4e657f7
 
 
 
 
 
d84e52f
 
 
4e657f7
d84e52f
 
 
4e657f7
d84e52f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import json
import numpy as np
import pandas as pd

from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os    

# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")

# Explicit login
login(token=hf_token)

# --- Configuration ---
print("Loading RAG system on your device...")

# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data.json"

# Load data
print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

# Set data
documents = data

# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)

# Use pandas dataframe
df = pd.DataFrame(
    {
        "Document": documents,
        "Embedding": list(embeddings),  # store as list
    }
)

# Load LLM Pipeline
llm = pipeline(
    "text-generation",
    model="google/gemma-3-4b-it", # Might not have enough storage ngl
    token=hf_token
)

def clean_query_with_llm(query): 
    prompt_content = f"""    
    Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
    You have access to SFU IT Knowledge Base index with 100's of chunked documents.
    Generate a search question based the user's question.
    If you cannot generate a search query, return just the number 0.
    User's Question:
    {query}
    Search Query:
    """

    response = llm(
        prompt_content,
        max_new_tokens=100,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()
    

# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 5):
    """
    Embed the query, compute cosine similarity to each document,
    and return the top_k most similar documents (as a DataFrame).
    """
    query_embedding = embedding_model.encode([query])[0]

    def cosine_sim(x):
        x = np.array(x)
        return float(
            np.dot(query_embedding, x)
            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
        )

    df["Similarity"] = df["Embedding"].apply(cosine_sim)
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Document", "Similarity"]]


def generate_with_rag(query, top_k=5):
        # goSFU specific cleaning
    if "gosfu" in query.lower():
        query = query.replace("gosfu", "goSFU") 

    # Retrieve
    search_query = clean_query_with_llm(query)
    results = retrieve_with_pandas(search_query)

    # Turn the Series into a single string of text
    # (each doc separated by a divider)
    context_str = "\n\n---\n\n".join(results["Document"].tolist())

    # Build a clean prompt
    prompt_content = f"""
    You are a SFU IT helpdesk chatbot.
    Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting. 
    Below is new question asked by the user, and related article chunks to the user question.
    If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
    If there are links in the articles, provide those links in your answer.
    If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
    If the user DID NOT ask a question, be friendly and ask how you can help them.
    Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
    If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
    Do not ask the user any follow-up questions after answering them.
   
Question:
{query}
-- Start of Articles --
{context_str}
-- End of Articles --
Answer:"""

    # Call the LLM
    response = llm(
        prompt_content,
        max_new_tokens=500,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()
    

def chat_fn(message, history):
    """
    Chat Interface callback
    """
    answer = generate_with_rag(message, top_k=5)
    return answer


demo = gr.ChatInterface(
    fn=chat_fn,
    title="SFU IT Chatbot",
    description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)

# share=True
if __name__ == "__main__":
    demo.launch()