Spaces:

Sazid2
/

Assamese

Sleeping

App Files Files Community

Sazid2 commited on 21 days ago

Commit

36162a4

verified ·

1 Parent(s): 86a2d4b

Update app.py

Browse files

Files changed (1) hide show

app.py +429 -158

app.py CHANGED Viewed

@@ -1,160 +1,359 @@
 # app.py
 """
-Jajabor – Minimal safe version (no FAISS, no torch, no transformers)
-- Retrieval: TF-IDF (scikit-learn)
-- PDF reading: PyPDF2
-- OCR: pytesseract
-- Math: sympy
-- UI: Gradio
 """
 import os
 import io
 import sqlite3
 from datetime import datetime
-import traceback
-from PyPDF2 import PdfReader
-from PIL import Image
-import gradio as gr
 import numpy as np
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import linear_kernel
 import pytesseract
 import sympy as sp
-# ---------- CONFIG ----------
-APP_NAME = "Jajabor – Minimal (TF-IDF retrieval)"
-BASE_DIR = os.path.abspath(os.path.dirname(__file__))
 PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
 DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
 CHUNK_SIZE = 600
 CHUNK_OVERLAP = 120
-TOP_K = 3
-# ---------- DB ----------
-def init_db(path=DB_PATH):
-    os.makedirs(os.path.dirname(path), exist_ok=True)
-    conn = sqlite3.connect(path)
     cur = conn.cursor()
-    cur.execute("""CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY AUTOINCREMENT, username TEXT UNIQUE, created_at TEXT)""")
-    cur.execute("""CREATE TABLE IF NOT EXISTS interactions (id INTEGER PRIMARY KEY AUTOINCREMENT, user_id INTEGER, timestamp TEXT, query TEXT, answer TEXT, is_math INTEGER)""")
     conn.commit()
     conn.close()
-def get_or_create_user(username):
     username = username.strip()
     if not username:
         return None
     conn = sqlite3.connect(DB_PATH)
     cur = conn.cursor()
     cur.execute("SELECT id FROM users WHERE username=?", (username,))
     row = cur.fetchone()
     if row:
-        uid = row[0]
     else:
-        cur.execute("INSERT INTO users (username, created_at) VALUES (?, ?)", (username, datetime.utcnow().isoformat()))
         conn.commit()
-        uid = cur.lastrowid
     conn.close()
-    return uid
-def log_interaction(user_id, query, answer, is_math):
     conn = sqlite3.connect(DB_PATH)
     cur = conn.cursor()
-    cur.execute("INSERT INTO interactions (user_id, timestamp, query, answer, is_math) VALUES (?, ?, ?, ?, ?)",
-                (user_id, datetime.utcnow().isoformat(), query, answer, 1 if is_math else 0))
     conn.commit()
     conn.close()
 init_db()
-# ---------- PDF reading ----------
-def extract_text_from_pdf(pdf_path):
-    pages = []
     try:
-        reader = PdfReader(pdf_path)
-        for page in reader.pages:
-            try:
-                txt = page.extract_text() or ""
-                pages.append(txt)
-            except Exception:
-                continue
-    except Exception as e:
-        print("PDF read error:", e)
     return "\n".join(pages)
-def load_all_pdfs(pdf_dir):
     texts = []
     metas = []
-    if not os.path.isdir(pdf_dir):
-        print("PDF_DIR not found:", pdf_dir)
         return texts, metas
     for fname in sorted(os.listdir(pdf_dir)):
         if fname.lower().endswith(".pdf"):
             path = os.path.join(pdf_dir, fname)
             print("Reading:", path)
             text = extract_text_from_pdf(path)
-            texts.append(text)
-            metas.append({"source": fname})
     return texts, metas
-def split_text_into_chunks(text, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
-    if not text:
-        return []
     chunks = []
-    step = chunk_size - overlap
-    i = 0
-    while i < len(text):
-        chunk = text[i:i+chunk_size]
         if chunk.strip():
             chunks.append(chunk)
-        i += max(step, 1)
     return chunks
-# ---------- Build TF-IDF index ----------
-print("Loading PDFs and building TF-IDF index...")
-all_texts, all_metas = load_all_pdfs(PDF_DIR)
-corpus_chunks = []
-corpus_metas = []
-for text, meta in zip(all_texts, all_metas):
-    chs = split_text_into_chunks(text)
-    corpus_chunks.extend(chs)
-    corpus_metas.extend([meta] * len(chs))
-if len(corpus_chunks) == 0:
-    print("No PDF chunks found. Upload PDFs into pdfs/class10/")
-vectorizer = None
-tfidf_matrix = None
-if corpus_chunks:
-    try:
-        vectorizer = TfidfVectorizer(stop_words="english", max_features=50000)
-        tfidf_matrix = vectorizer.fit_transform(corpus_chunks)
-        print("TF-IDF ready. Chunks:", len(corpus_chunks))
-    except Exception as e:
-        print("Failed to build TF-IDF:", e)
-        vectorizer = None
-        tfidf_matrix = None
-def retrieve_tfidf(query, top_k=TOP_K):
-    if tfidf_matrix is None or vectorizer is None:
         return []
-    qv = vectorizer.transform([query])
-    sims = linear_kernel(qv, tfidf_matrix).flatten()
-    idxs = sims.argsort()[::-1][:top_k]
     results = []
-    for idx in idxs:
-        if sims[idx] <= 0:
             continue
-        results.append({"score": float(sims[idx]), "text": corpus_chunks[idx], "meta": corpus_metas[idx]})
     return results
-# ---------- OCR and math ----------
 def ocr_from_image(img: Image.Image):
     try:
         img = img.convert("RGB")
     except Exception:
@@ -168,131 +367,203 @@ def ocr_from_image(img: Image.Image):
             text = ""
     return text.strip()
 def is_likely_math(text: str) -> bool:
-    if not text:
-        return False
     math_chars = set("0123456789+-*/=^()%")
     if any(ch in text for ch in math_chars):
         return True
-    kws = ["গণিত", "সমীকৰণ", "বীজগণিত", "math", "solve", "equation"]
     return any(k in text for k in kws)
 def solve_math_expression(expr: str):
     try:
         expr = expr.replace("^", "**")
         if "=" in expr:
             left, right = expr.split("=", 1)
-            eq = sp.Eq(sp.sympify(left), sp.sympify(right))
             sol = sp.solve(eq)
-            return "ধাপ-ধাপে সমাধান (সংক্ষেপ):\n" + str(sol)
         else:
-            simp = sp.simplify(sp.sympify(expr))
-            return f"সরলীকৰণ: {simp}"
     except Exception:
-        return "গণিতীয় অভিব্যক্তি বুজা যায় নাই — দয়া কৰি সঠিকভাৱে লিখক।"
-# ---------- Answering (extractive) ----------
-def answer_with_retrieval(query, chat_history):
-    results = retrieve_tfidf(query, top_k=TOP_K)
-    if not results:
-        return "পাঠ্যপুথি সম্বন্ধীয় তথ্য নহল; দয়া কৰি অধিক স্পষ্টকৈ সোধক।"
-    # Combine top chunks as extractive answer (shorten if too long)
-    answer_parts = []
-    for r in results:
-        txt = r["text"].strip()
-        if len(txt) > 800:
-            txt = txt[:800].rsplit("\n", 1)[0] + "…"
-        answer_parts.append(f"[Source: {r['meta'].get('source','textbook')}] \n{txt}")
-    return "\n\n".join(answer_parts)
-# ---------- Chat logic ----------
 def login_user(username, user_state):
     username = (username or "").strip()
     if not username:
-        return user_state, "⚠️ অনুগ্ৰহ কৰি লগিনৰ বাবে এটা নাম লিখক।"
     user_id = get_or_create_user(username)
     user_state = {"username": username, "user_id": user_id}
-    total, math_count = 0, 0
-    try:
-        total, math_count = (lambda uid: (lambda c,m: (c,m))( * (lambda cur: (cur.execute("SELECT COUNT(*), SUM(is_math) FROM interactions WHERE user_id=?", (uid,)), cur.fetchone())[1] ) )(uid) )(user_id)
-    except Exception:
-        total, math_count = get_or_create_user(username) and (0,0)
-    stats = f"👤 {username}\n📊 মোট প্ৰশ্ন: {total}\n🧮 গণিত: {math_count}"
     return user_state, stats
-def chat_logic(username, text_input, image_input, audio_input, chat_history, user_state):
-    if chat_history is None:
-        chat_history = []
     if not user_state or not user_state.get("user_id"):
-        sys_msg = "⚠️ প্ৰথমে লগিন কৰক।"
         chat_history = chat_history + [[text_input or "", sys_msg]]
-        return chat_history, user_state, ""
     user_id = user_state["user_id"]
     final_query_parts = []
     ocr_text = ""
-    if image_input:
         try:
-            if isinstance(image_input, str):
-                img = Image.open(image_input)
             else:
-                raw = image_input.read()
-                img = Image.open(io.BytesIO(raw))
             ocr_text = ocr_from_image(img)
             if ocr_text:
                 final_query_parts.append(ocr_text)
-        except Exception:
-            pass
     if text_input:
         final_query_parts.append(text_input)
     if not final_query_parts:
-        sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক বা ছবি আপলোড কৰক।"
         chat_history = chat_history + [["", sys_msg]]
-        return chat_history, user_state, ""
     full_query = "\n".join(final_query_parts)
-    is_math_flag = is_likely_math(full_query)
-    if is_math_flag:
         math_answer = solve_math_expression(full_query)
-        # Use extractive retrieval to provide supporting text and then math result
-        retrieval = answer_with_retrieval(full_query, chat_history)
-        final_answer = f"{retrieval}\n\nগণিত সমাধান:\n{math_answer}"
     else:
-        final_answer = answer_with_retrieval(full_query, chat_history)
-    log_interaction(user_id, full_query, final_answer, is_math_flag)
-    display_q = text_input or ocr_text or "(image)"
-    chat_history = chat_history + [[display_q, final_answer]]
-    return chat_history, user_state, ""
-# ---------- Gradio UI ----------
-with gr.Blocks(title=APP_NAME) as demo:
-    gr.Markdown("# 🧭 Jajabor – Minimal TF-IDF Tutor (Free)")
     user_state = gr.State({})
     with gr.Row():
         with gr.Column(scale=1):
-            username_inp = gr.Textbox(label="নাম / ইউজাৰ আইডি", placeholder="e.g. abu10")
-            login_btn = gr.Button("Login")
-            stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।")
         with gr.Column(scale=3):
-            chat = gr.Chatbot(label="জাজাবৰ", height=480)
-            text_inp = gr.Textbox(label="আপোনাৰ প্ৰশ্ন লিখক", lines=2)
             with gr.Row():
-                image_inp = gr.Image(label="📷 ছবি (Optional)", type="filepath")
-                audio_inp = gr.Audio(label="🎙️ (Optional)", type="filepath")
-            ask_btn = gr.Button("সোধক")
     login_btn.click(login_user, inputs=[username_inp, user_state], outputs=[user_state, stats_md])
-    ask_btn.click(chat_logic, inputs=[username_inp, text_inp, image_inp, audio_inp, chat, user_state], outputs=[chat, user_state, None])
-    text_inp.submit(chat_logic, inputs=[username_inp, text_inp, image_inp, audio_inp, chat, user_state], outputs=[chat, user_state, None])
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 # app.py
 """
+Jajabor – SEBA Assamese Class 10 Tutor
+Hugging Face Spaces ready Gradio app (single-file)
+This file contains a working, lightweight adaptation of your Colab notebook
+so it can run on Hugging Face Spaces (CPU-friendly demo).
+IMPORTANT notes for deployment:
+- Spaces has limited CPU/GPU. Large models (Qwen2.5, BAAI/bge-m3) won't run
+  locally in most Spaces. This app uses smaller models for a working demo.
+- For production-quality behavior, switch embeddings/LLM calls to the
+  Hugging Face Inference API (use your HF token) or host on Colab/VM with GPU.
+Create a `requirements.txt` with these entries (add to your repo):
+    gradio==4.44.0
+    pymupdf
+    sentence-transformers
+    faiss-cpu
+    transformers
+    accelerate
+    torch
+    pytesseract
+    pillow
+    sympy
+    huggingface_hub
+Place your SEBA Class10 PDFs in the repository under `pdfs/class10/`.
+Usage on Spaces:
+- Upload the repo (app.py + requirements.txt + pdfs/class10/*).
+- If you want higher-quality LLMs/embeddings, set a repo secret HF_TOKEN
+  and configure INFERENCE_MODELS below.
 """
 import os
 import io
 import sqlite3
 from datetime import datetime
+import threading
+import fitz  # PyMuPDF
 import numpy as np
+from PIL import Image
+import gradio as gr
+import faiss
 import pytesseract
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import sympy as sp
+from huggingface_hub import InferenceApi
+# ---------------------- Configuration ----------------------
+APP_NAME = "Jajabor – SEBA Assamese Class 10 Tutor (Spaces demo)"
+BASE_DIR = os.path.abspath(".")
 PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
 DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
+# Lightweight defaults for Spaces demo. Replace with heavier models via Inference API.
+EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL_LOCAL = "sshleifer/tiny-gpt2"  # very small demo model (optional local)
+# If you set HF_TOKEN as a repo secret / environment variable, the app will
+# use the Inference API models below for better results.
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+INFERENCE_EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"  # example
+INFERENCE_LLM_MODEL = "bigscience/bloomz-1b1"  # example remote model
 CHUNK_SIZE = 600
 CHUNK_OVERLAP = 120
+TOP_K = 5
+# Global variables initialized later
+embedding_model = None
+index = None
+corpus_chunks = []
+corpus_metas = []
+# If HF_TOKEN provided, create inference clients
+inference_embed_client = None
+inference_llm_client = None
+if HF_TOKEN:
+    try:
+        inference_embed_client = InferenceApi(repo_id=INFERENCE_EMBED_MODEL, token=HF_TOKEN)
+        inference_llm_client = InferenceApi(repo_id=INFERENCE_LLM_MODEL, token=HF_TOKEN)
+    except Exception:
+        inference_embed_client = None
+        inference_llm_client = None
+# ---------------------- Database ----------------------
+def init_db(db_path=DB_PATH):
+    os.makedirs(os.path.dirname(db_path), exist_ok=True)
+    conn = sqlite3.connect(db_path)
     cur = conn.cursor()
+    cur.execute(
+        """
+        CREATE TABLE IF NOT EXISTS users (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            username TEXT UNIQUE,
+            created_at TEXT
+        )
+        """
+    )
+    cur.execute(
+        """
+        CREATE TABLE IF NOT EXISTS interactions (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            user_id INTEGER,
+            timestamp TEXT,
+            query TEXT,
+            answer TEXT,
+            is_math INTEGER,
+            FOREIGN KEY(user_id) REFERENCES users(id)
+        )
+        """
+    )
     conn.commit()
     conn.close()
+def get_or_create_user(username: str):
     username = username.strip()
     if not username:
         return None
     conn = sqlite3.connect(DB_PATH)
     cur = conn.cursor()
     cur.execute("SELECT id FROM users WHERE username=?", (username,))
     row = cur.fetchone()
     if row:
+        user_id = row[0]
     else:
+        cur.execute(
+            "INSERT INTO users (username, created_at) VALUES (?, ?)",
+            (username, datetime.utcnow().isoformat()),
+        )
         conn.commit()
+        user_id = cur.lastrowid
     conn.close()
+    return user_id
+def log_interaction(user_id, query, answer, is_math: bool):
     conn = sqlite3.connect(DB_PATH)
     cur = conn.cursor()
+    cur.execute(
+        """
+        INSERT INTO interactions (user_id, timestamp, query, answer, is_math)
+        VALUES (?, ?, ?, ?, ?)
+        """,
+        (user_id, datetime.utcnow().isoformat(), query, answer, 1 if is_math else 0),
+    )
     conn.commit()
     conn.close()
+def get_user_stats(user_id):
+    conn = sqlite3.connect(DB_PATH)
+    cur = conn.cursor()
+    cur.execute("SELECT COUNT(*), SUM(is_math) FROM interactions WHERE user_id=?", (user_id,))
+    row = cur.fetchone()
+    conn.close()
+    total = row[0] or 0
+    math_count = row[1] or 0
+    return total, math_count
 init_db()
+# ---------------------- PDF loading + RAG ----------------------
+def extract_text_from_pdf(pdf_path: str) -> str:
     try:
+        doc = fitz.open(pdf_path)
+    except Exception:
+        return ""
+    pages = []
+    for page in doc:
+        txt = page.get_text("text")
+        if txt:
+            pages.append(txt)
     return "\n".join(pages)
+def load_all_pdfs(pdf_dir: str):
     texts = []
     metas = []
+    if not os.path.exists(pdf_dir):
+        print("PDF_DIR does not exist:", pdf_dir)
         return texts, metas
     for fname in sorted(os.listdir(pdf_dir)):
         if fname.lower().endswith(".pdf"):
             path = os.path.join(pdf_dir, fname)
             print("Reading:", path)
             text = extract_text_from_pdf(path)
+            if text:
+                texts.append(text)
+                metas.append({"source": fname})
     return texts, metas
+def split_text(text: str, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
     chunks = []
+    start = 0
+    L = len(text)
+    while start < L:
+        end = min(start + chunk_size, L)
+        chunk = text[start:end]
         if chunk.strip():
             chunks.append(chunk)
+        if end == L:
+            break
+        start = end - overlap
     return chunks
+def build_embedding_index():
+    global embedding_model, index, corpus_chunks, corpus_metas
+    print("Loading embedding model:", EMBEDDING_MODEL_NAME)
+    embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
+    all_texts, all_metas = load_all_pdfs(PDF_DIR)
+    corpus_chunks = []
+    corpus_metas = []
+    for text, meta in zip(all_texts, all_metas):
+        chs = split_text(text)
+        corpus_chunks.extend(chs)
+        corpus_metas.extend([meta] * len(chs))
+    if not corpus_chunks:
+        print("No document chunks found - RAG will be empty.")
+        index = None
+        return
+    print("Encoding", len(corpus_chunks), "chunks...")
+    embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
+    dim = embs.shape[1]
+    index = faiss.IndexFlatL2(dim)
+    index.add(embs)
+    print("FAISS index ready with dim", dim)
+# Build in a background thread so Spaces can start quickly
+threading.Thread(target=build_embedding_index, daemon=True).start()
+def rag_search(query: str, k: int = TOP_K):
+    if index is None or embedding_model is None:
         return []
+    q_vec = embedding_model.encode([query]).astype("float32")
+    D, I = index.search(q_vec, k)
     results = []
+    for dist, idx in zip(D[0], I[0]):
+        if idx == -1:
             continue
+        results.append({
+            "score": float(dist),
+            "text": corpus_chunks[idx],
+            "meta": corpus_metas[idx],
+        })
     return results
+# ---------------------- LLM + RAG prompt building ----------------------
+# Try to create a small local LLM pipeline for demo; if not present, fallback to Inference API
+local_llm = None
+try:
+    tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_LOCAL)
+    model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_LOCAL)
+    local_llm = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=256,
+        do_sample=True,
+        temperature=0.3,
+        top_p=0.9,
+    )
+    print("Local tiny LLM loaded for demo.")
+except Exception:
+    local_llm = None
+    print("Local LLM not available; will use Inference API if HF_TOKEN is set.")
+SYSTEM_PROMPT = """
+You are "Jajabor", an expert SEBA Assamese tutor for Class 10.
+Always prefer to answer in Assamese. If the student clearly asks for English, you may reply in English.
+Rules:
+- Use ONLY the given textbook context when requested.
+- If you are not sure, say: "এই প্ৰশ্নটো পাঠ্যপুথিৰ অংশত স্পষ্টকৈ নাই, সেয়েহে মই নিশ্চিত নহয়।"
+- বোঝাপৰা সহজ ভাষাত ব্যাখ্যা কৰা, উদাহৰণ দিয়ক।
+- If it is a maths question, explain step-by-step clearly.
+"""
+def build_rag_prompt(context_blocks, question, chat_history):
+    ctx = ""
+    for i, block in enumerate(context_blocks, start=1):
+        src = block["meta"].get("source", "textbook")
+        ctx += f"\n[Context {i} – {src}]\n{block['text']}\n"
+    hist = ""
+    for role, msg in chat_history:
+        hist += f"{role}: {msg}\n"
+    prompt = f"{SYSTEM_PROMPT}\n\nপূর্বৰ বাৰ্তাসমূহ:\n{hist}\nসদস্যৰ প্ৰশ্ন:\n{question}\n\nসম্পৰ্কিত পাঠ্যপুথিৰ অংশ:\n{ctx}\n\nএতিয়া একেদম সহায়ক আৰু বুজিবলৈ সহজ উত্তৰ দিয়া।"
+    return prompt
+def llm_answer_with_rag(question: str, chat_history):
+    retrieved = rag_search(question, TOP_K)
+    prompt = build_rag_prompt(retrieved, question, chat_history)
+    # Prefer Inference API if available
+    if inference_llm_client is not None:
+        try:
+            resp = inference_llm_client(inputs=prompt, params={"max_new_tokens": 512})
+            # InferenceApi returns a dict or string depending on model
+            if isinstance(resp, dict) and "generated_text" in resp:
+                out_text = resp["generated_text"]
+            elif isinstance(resp, str):
+                out_text = resp
+            else:
+                out_text = str(resp)
+            # Some remote models echo the prompt; try to strip prompt
+            if out_text.startswith(prompt):
+                answer = out_text[len(prompt):].strip()
+            else:
+                answer = out_text.strip()
+            return answer
+        except Exception:
+            pass
+    # Fallback to local tiny model
+    if local_llm is not None:
+        out = local_llm(prompt, num_return_sequences=1)[0]["generated_text"]
+        if out.startswith(prompt):
+            return out[len(prompt):].strip()
+        return out
+    # If nothing available, return a safe fallback
+    return (
+        "দুখঃখিত—এই Spaces ইনষ্টলেশ্যনটোৱে প্ৰতিস্থাপন কৰিব পৰা কোনো LLM নাপালে।"
+        " যদি আপুনি HF_TOKEN হিচাপে এক্সেস টোকেন যোগ কৰে, মই অনলাইন Inference API ব্যৱহাৰ কৰি উত্তৰ দিম."
+    )
+# ---------------------- OCR + math helpers ----------------------
 def ocr_from_image(img: Image.Image):
+    if img is None:
+        return ""
     try:
         img = img.convert("RGB")
     except Exception:
             text = ""
     return text.strip()
 def is_likely_math(text: str) -> bool:
     math_chars = set("0123456789+-*/=^()%")
     if any(ch in text for ch in math_chars):
         return True
+    kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত"]
     return any(k in text for k in kws)
 def solve_math_expression(expr: str):
     try:
         expr = expr.replace("^", "**")
         if "=" in expr:
             left, right = expr.split("=", 1)
+            left_s = sp.sympify(left)
+            right_s = sp.sympify(right)
+            eq = sp.Eq(left_s, right_s)
             sol = sp.solve(eq)
+            steps = []
+            steps.append("প্ৰথমে সমীকৰণ লওঁ:")
+            steps.append(f"{sp.pretty(eq)}")
+            steps.append("Sympy ৰ সহায়ত সমাধান পোৱা যায়:")
+            steps.append(str(sol))
+            explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
+            explanation += f"\n\nসেয়েহে সমাধান: {sol}"
         else:
+            expr_s = sp.sympify(expr)
+            simp = sp.simplify(expr_s)
+            explanation = (
+                "প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
+                f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
+            )
+        return explanation
     except Exception:
+        return (
+            "মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
+            "দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্টকৈ লিখা: উদাহৰণ – 2x + 3 = 7"
+        )
+# ---------------------- Chat logic ----------------------
 def login_user(username, user_state):
     username = (username or "").strip()
     if not username:
+        return user_state, "⚠️ অনুগ্ৰহ কৰি প্ৰথমে লগিনৰ বাবে এটা নাম লিখক।"
     user_id = get_or_create_user(username)
     user_state = {"username": username, "user_id": user_id}
+    total, math_count = get_user_stats(user_id)
+    stats = (
+        f"👤 ব্যৱহাৰকাৰী: **{username}**\n\n"
+        f"📊 মোট প্ৰশ্ন: **{total}**\n"
+        f"🧮 গণিত প্ৰশ্ন: **{math_count}**"
+    )
     return user_state, stats
+def chat_logic(
+    username,
+    text_input,
+    image_input,
+    audio_input,
+    chat_history,
+    user_state,
+):
     if not user_state or not user_state.get("user_id"):
+        sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
         chat_history = chat_history + [[text_input or "", sys_msg]]
+        return chat_history, user_state, None
     user_id = user_state["user_id"]
     final_query_parts = []
+    # audio_input not handled in this demo
     ocr_text = ""
+    if image_input is not None:
         try:
+            if hasattr(image_input, "name"):
+                img = Image.open(image_input.name)
+            elif isinstance(image_input, (bytes, bytearray)):
+                img = Image.open(io.BytesIO(image_input))
             else:
+                img = image_input
+        except Exception:
+            try:
+                img = Image.open(io.BytesIO(image_input.read()))
+            except Exception:
+                img = None
+        if img is not None:
             ocr_text = ocr_from_image(img)
             if ocr_text:
                 final_query_parts.append(ocr_text)
     if text_input:
         final_query_parts.append(text_input)
     if not final_query_parts:
+        sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
         chat_history = chat_history + [["", sys_msg]]
+        return chat_history, user_state, None
     full_query = "\n".join(final_query_parts)
+    conv = []
+    for u, b in chat_history:
+        if u:
+            conv.append(("Student", u))
+        if b:
+            conv.append(("Tutor", b))
+    is_math = is_likely_math(full_query)
+    if is_math:
         math_answer = solve_math_expression(full_query)
+        combined_question = (
+            full_query
+            + "\n\nগণিত প্ৰোগ্ৰামে এই ফলাফল দিছে:\n"
+            + math_answer
+            + "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
+        )
+        final_answer = llm_answer_with_rag(combined_question, conv)
     else:
+        final_answer = llm_answer_with_rag(full_query, conv)
+    log_interaction(user_id, full_query, final_answer, is_math)
+    display_question = text_input or ocr_text or "(empty)"
+    chat_history = chat_history + [[display_question, final_answer]]
+    return chat_history, user_state, None
+# ---------------------- Gradio UI ----------------------
+with gr.Blocks(title=APP_NAME, theme="soft") as demo:
+    gr.Markdown(
+        """
+        # 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor
+        - 📘 SEBA ক্লাছ ১০ পাঠ্যপুথিৰ ওপৰত ভিত্তি কৰি উত্তৰ
+        - 🗣️ টেক্স্ট + ছবি (OCR) ইনপুট
+        - 🧮 গণিত প্ৰশ্নৰ ধাপ-ধাপে সমাধান
+        - 👤 ইউজাৰ লগিন + প্ৰগতি (progress) সংৰক্ষণ
+        """
+    )
     user_state = gr.State({})
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### 👤 লগিন")
+            username_inp = gr.Textbox(
+                label="নাম / ইউজাৰ আইডি",
+                placeholder="উদাহৰণ: abu10, student01 ...",
+            )
+            login_btn = gr.Button("✅ Login / লগিন")
+            stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।", elem_classes="stats-box")
+            gr.Markdown(
+                """
+                ### 💡 টিপছ
+                - "ক্লাছ ১০ গণিত: উদাহৰণ ৩.১ প্ৰশ্ন ২" – এই ধৰণৰ প্ৰশ্ন ভাল
+                - ফটো আপলোড কৰিলে টেক্স্টটো OCR কৰি পঢ়িব চেষ্টা কৰা হয়
+                - সম্ভব হলে প্ৰশ্নটো অসমীয়াত সোধক 🙂
+                """
+            )
         with gr.Column(scale=3):
+            chat = gr.Chatbot(label="জাজাবৰ সৈতে কথোপকথন", height=500)
+            text_inp = gr.Textbox(
+                label="আপোনাৰ প্ৰশ্ন লিখক",
+                placeholder="উদাহৰণ: \"ক্লাছ ১০ অসমীয়া: অনুচ্ছেদ পাঠ ১ ৰ মূল বিষয় কি?\"",
+                lines=2,
+            )
+            with gr.Row():
+                image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="file")
+                audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub — not used now)", type="numpy")
             with gr.Row():
+                ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
     login_btn.click(login_user, inputs=[username_inp, user_state], outputs=[user_state, stats_md])
+    def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
+        if user_state_inner and username_inner and not user_state_inner.get("username"):
+            user_state_inner["username"] = username_inner
+        return chat_logic(username_inner, text, image, audio, history, user_state_inner)
+    ask_btn.click(
+        wrapped_chat,
+        inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
+        outputs=[chat, user_state, gr.State(None)],
+    )
+demo.queue(concurrency_count=4).launch(server_name="0.0.0.0")