Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Running

App Files Files Community

bhardwaj08sarthak commited on Sep 25

Commit

e038f18

verified ·

1 Parent(s): 54c3df6

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -34

app.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
 import sys
 import os
 from huggingface_hub import login
 login(os.getenv("HF_Token"))
 import json
@@ -23,43 +25,46 @@ from all_tools import classify_and_score, QuestionRetrieverTool
 from phrases import BLOOMS_PHRASES, DOK_PHRASES
 import spaces
 # Prebuild embeddings once
-_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
-D = {
-    "GSM8k": GSM8k['question'],
-    "Olympiad": Olympiad_math['question'],
-    "Olympiad2": Olympiad_math2['question'],
-    "DeepMind Math": clean_math['question'],
-    "MMMLU": MMMLU['question'],
-    "MMMU": MMMU['question'],
-    "ScienceQA": ScienceQA['question'],
-    "PubmedQA": PubmedQA['question']
-}
-all_questions = (
-    list(D["GSM8k"]) +
-    list(D["Olympiad"]) +
-    list(D["MMMLU"]) +
-    list(D["MMMU"]) +
-    list(D["DeepMind Math"]) +
-    list(D["Olympiad2"]) +
-    list(D["ScienceQA"]) +
-    list(D["PubmedQA"])
-)
-texts = all_questions
-@spaces.GPU(15)
-def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
-    device = 'cuda'
-    emb = HuggingFaceEmbeddings(
-        model_name="model",
-        model_kwargs={"device": device},
-        encode_kwargs={"normalize_embeddings": True})
-    idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
-    return idx
-    device = "cuda"
-index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
 # ------------------------ Agent setup with timeout ------------------------
 def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
     client = InferenceClient(

 # It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
 import sys
 import os
+from huggingface_hub import hf_hub_download
+import pickle
 from huggingface_hub import login
 login(os.getenv("HF_Token"))
 import json
 from phrases import BLOOMS_PHRASES, DOK_PHRASES
 import spaces
 # Prebuild embeddings once
+_backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
+file_path = hf_hub_download("bhardwaj08sarthak/stem_questioin_embeddings", "index.pkl")
+with open(file_path, "rb") as f:
+    index = pickle.load(f)
+#D = {
+#    "GSM8k": GSM8k['question'],
+#    "Olympiad": Olympiad_math['question'],
+#    "Olympiad2": Olympiad_math2['question'],
+#    "DeepMind Math": clean_math['question'],
+#    "MMMLU": MMMLU['question'],
+#    "MMMU": MMMU['question'],
+#    "ScienceQA": ScienceQA['question'],
+#    "PubmedQA": PubmedQA['question']
+#}
+#all_questions = (
+#    list(D["GSM8k"]) +
+#    list(D["Olympiad"]) +
+#    list(D["MMMLU"]) +
+#    list(D["MMMU"]) +
+#    list(D["DeepMind Math"]) +
+#    list(D["Olympiad2"]) +
+#    list(D["ScienceQA"]) +
+#    list(D["PubmedQA"])
+#)
+#texts = all_questions
+#@spaces.GPU(15)
+#def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
+#    device = 'cuda'
+#    emb = HuggingFaceEmbeddings(
+#        model_name="model",
+#        model_kwargs={"device": device},
+#        encode_kwargs={"normalize_embeddings": True})
+#    idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
+#    return idx
+#   device = "cuda"
+#index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
 # ------------------------ Agent setup with timeout ------------------------
 def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
     client = InferenceClient(