tomas.helmfridsson commited on
Commit
fb858f0
Β·
1 Parent(s): 0f86498

updates for working

Browse files
Files changed (2) hide show
  1. app.py +57 -20
  2. requirements.txt +2 -2
app.py CHANGED
@@ -7,65 +7,102 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
7
  from langchain_huggingface.llms import HuggingFacePipeline
8
  from langchain.chains import RetrievalQA
9
 
10
- # ── 1) Ladda & indexera PDF:er ────────────────────────────────
11
  docs, files = [], []
12
  for fn in os.listdir("document"):
13
  if fn.lower().endswith(".pdf"):
14
- loader = PyPDFLoader(os.path.join("document", fn))
 
15
  docs.extend(loader.load_and_split())
16
  files.append(fn)
17
 
 
18
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
19
  vs = FAISS.from_documents(docs, emb)
20
 
21
- # ── 2) Initiera LLM & RAG-kedja ──────────────────────────────
22
- pipe = pipeline("text-generation", model="tiiuae/falcon-rw-1b", device=-1)
23
- llm = HuggingFacePipeline(
 
 
 
 
 
24
  pipeline=pipe,
25
- model_kwargs={"temperature": 0.3, "max_new_tokens": 512}
26
  )
27
- qa = RetrievalQA.from_chain_type(llm=llm, retriever=vs.as_retriever())
28
 
29
- # ── 3) Chat-funktion ───────────────────────────────────────────
30
  def chat_fn(message, temperature, history):
 
 
 
 
 
31
  history = history or []
32
- if not message:
33
- history.append(("","⚠️ Du mΓ₯ste skriva en frΓ₯ga."))
 
 
34
  return history, history
 
 
 
 
 
35
  if len(message) > 1000:
36
- history.append((message, f"⚠️ FΓΆr lΓ₯ng frΓ₯ga ({len(message)} tecken)."))
 
 
 
37
  return history, history
38
 
 
39
  llm.model_kwargs["temperature"] = temperature
 
 
40
  try:
41
- answer = qa.invoke({"query": message})["result"]
 
42
  except Exception as e:
43
- answer = f"❌ Ett fel uppstod: {e}"
44
- history.append((message, answer))
 
 
45
  return history, history
46
 
47
- # ── 4) Bygg Gradio-UI ──────────────────────────────────────────
48
  with gr.Blocks() as demo:
49
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
50
-
51
  gr.Markdown(
52
  "**βœ… Laddade PDF-filer:**\n\n" +
53
  "\n".join(f"- {f}" for f in files)
54
  )
55
 
56
  with gr.Row():
57
- txt = gr.Textbox(label="Din frΓ₯ga:", placeholder="Ex: Vad handlar dokumentet om?")
58
- temp = gr.Slider(0.0, 1.0, value=0.3, step=0.05, label="Temperatur")
 
 
 
 
 
 
 
59
  send = gr.Button("Skicka")
60
 
61
- # HΓ€r lagras och visas historiken
62
  chat_state = gr.State([])
63
  chatbot = gr.Chatbot(value=[], type="messages")
64
 
 
65
  send.click(
66
  fn=chat_fn,
67
  inputs=[txt, temp, chat_state],
68
  outputs=[chatbot, chat_state]
69
  )
70
 
71
- demo.launch()
 
 
 
7
  from langchain_huggingface.llms import HuggingFacePipeline
8
  from langchain.chains import RetrievalQA
9
 
10
+ # ── 1) Ladda & indexera alla PDF:er i mappen "document/" ─────────────────────
11
  docs, files = [], []
12
  for fn in os.listdir("document"):
13
  if fn.lower().endswith(".pdf"):
14
+ path = os.path.join("document", fn)
15
+ loader = PyPDFLoader(path)
16
  docs.extend(loader.load_and_split())
17
  files.append(fn)
18
 
19
+ # ── 2) Skapa embedding + FAISS-vektorstore ──────────────────────────────────
20
  emb = HuggingFaceEmbeddings(model_name="KBLab/sentence-bert-swedish-cased")
21
  vs = FAISS.from_documents(docs, emb)
22
 
23
+ # ── 3) Initiera LLM och RetrievalQA-kedja ──────────────────────────────────
24
+ pipe = pipeline(
25
+ "text-generation",
26
+ model="tiiuae/falcon-rw-1b",
27
+ device=-1,
28
+ max_new_tokens=128 # kortare svar fΓΆr snabbare inferens
29
+ )
30
+ llm = HuggingFacePipeline(
31
  pipeline=pipe,
32
+ model_kwargs={"temperature": 0.3}
33
  )
34
+ qa = RetrievalQA.from_chain_type(llm=llm, retriever=vs.as_retriever())
35
 
36
+ # ── 4) Chat-funktion som anvΓ€nder "messages"-formatet ────────────────────────
37
  def chat_fn(message, temperature, history):
38
+ """
39
+ - message: str, anvΓ€ndarens frΓ₯ga
40
+ - temperature: float, sampling-temperatur
41
+ - history: list of dicts, tidigare meddelanden i formatet {"role","content"}
42
+ """
43
  history = history or []
44
+
45
+ # Om anvΓ€ndaren inte skriver nΓ₯got
46
+ if not message.strip():
47
+ history.append({"role": "assistant", "content": "⚠️ Du mΓ₯ste skriva en frΓ₯ga."})
48
  return history, history
49
+
50
+ # LΓ€gg in anvΓ€ndarens frΓ₯ga
51
+ history.append({"role": "user", "content": message})
52
+
53
+ # Kortare frΓ₯gor om de Γ€r fΓΆr lΓ₯nga
54
  if len(message) > 1000:
55
+ history.append({
56
+ "role": "assistant",
57
+ "content": f"⚠️ FrΓ₯gan Γ€r fΓΆr lΓ₯ng ({len(message)} tecken)."
58
+ })
59
  return history, history
60
 
61
+ # AnvΓ€nd vald temperatur
62
  llm.model_kwargs["temperature"] = temperature
63
+
64
+ # KΓΆr RAG + fΓ₯ svar
65
  try:
66
+ result = qa.invoke({"query": message})
67
+ svar = result["result"]
68
  except Exception as e:
69
+ svar = f"❌ Ett fel uppstod vid bearbetning: {e}"
70
+
71
+ # LΓ€gg till svaret
72
+ history.append({"role": "assistant", "content": svar})
73
  return history, history
74
 
75
+ # ── 5) Bygg Gradio‐UI ─────────────────────────────────────────────────────────
76
  with gr.Blocks() as demo:
77
  gr.Markdown("## 🌟 Dokumentassistent (Svenska)")
 
78
  gr.Markdown(
79
  "**βœ… Laddade PDF-filer:**\n\n" +
80
  "\n".join(f"- {f}" for f in files)
81
  )
82
 
83
  with gr.Row():
84
+ txt = gr.Textbox(
85
+ lines=2,
86
+ label="Din frΓ₯ga:",
87
+ placeholder="Exempel: Vad sΓ€ger dokumentet om avsnittet 'Resultat'?"
88
+ )
89
+ temp = gr.Slider(
90
+ 0.0, 1.0, value=0.3, step=0.05,
91
+ label="Temperatur"
92
+ )
93
  send = gr.Button("Skicka")
94
 
95
+ # Intern state och chatbot-komponent som visar listor av dicts
96
  chat_state = gr.State([])
97
  chatbot = gr.Chatbot(value=[], type="messages")
98
 
99
+ # Bind knappen sΓ₯ att gradio genererar /api/predict-endpoint
100
  send.click(
101
  fn=chat_fn,
102
  inputs=[txt, temp, chat_state],
103
  outputs=[chatbot, chat_state]
104
  )
105
 
106
+ # Starta appen
107
+ if __name__ == "__main__":
108
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  huggingface_hub==0.25.2
2
- gradio
3
  langchain[all]>=0.1.14
4
  langchain-community>=0.0.19
5
  langchain-huggingface>=0.0.6
@@ -8,6 +7,7 @@ sentence-transformers
8
  faiss-cpu
9
  pdfminer.six
10
  pypdf
11
- google-genai==1.5.0
12
  pydantic==2.10.6
13
  python-dotenv
 
 
1
  huggingface_hub==0.25.2
 
2
  langchain[all]>=0.1.14
3
  langchain-community>=0.0.19
4
  langchain-huggingface>=0.0.6
 
7
  faiss-cpu
8
  pdfminer.six
9
  pypdf
10
+ #google-genai==1.5.0
11
  pydantic==2.10.6
12
  python-dotenv
13
+ gradio==5.6.0