import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import os
import json
import re
import threading
import time
import uuid
from datetime import datetime
import nltk
from transformers import pipeline
from huggingface_hub import HfApi, HfFolder
import requests

# --- NLTK Setup ---
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
    nltk.download("punkt_tab")

# --- Logging Setup ---
log_dir = "./logs"
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log")


def upload_logs_to_hf():
    try:
        api = HfApi()
        token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN")
        if not token: return
        api.upload_file(
            path_or_fileobj=log_file_path,
            path_in_repo="logs/Second_Opinion_Logs.log",
            repo_id="singhn9/privateSOWN",
            repo_type="space",
            token=token,
        )
    except Exception as e:
        print(f"Log upload failed: {e}")

def background_log_uploader(interval=300):
    while True:
        time.sleep(interval)
        if os.path.exists(log_file_path):
            upload_logs_to_hf()

if os.getenv("ENABLE_LOG_UPLOAD") == "1":
    threading.Thread(target=background_log_uploader, daemon=True).start()


def log_action(action, request: gr.Request = None):
    user_ip = request.client.host if request else "Unknown IP"
    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"
    with open(log_file_path, 'a') as f: f.write(log_entry)

def log_input_text(resume_text, job_list, request: gr.Request = None):
    user_ip = request.client.host if request else "Unknown IP"
    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list)
    log_entry = (
        f"{timestamp} (GMT) - IP: {user_ip}\n"
        f"--- Resume Input ---\n{resume_text}\n\n"
        f"--- Job Descriptions Input ---\n{jobs_str}\n"
        "---------------------------------------------\n"
    )
    with open(log_file_path, 'a') as f: f.write(log_entry)

# --- ASR & Models ---
asr = None

def get_asr():
    global asr
    if asr is None:
        asr = pipeline(
            "automatic-speech-recognition",
            model="openai/whisper-tiny.en",
            chunk_length_s=30,
            device=-1
        )
    return asr


# Load embedding models
models = {
    "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
    "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
    "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
    "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
    "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
    "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
    "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
    "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
    "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
    "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
}

# --- LLM Integration ---
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
API_URL = "https://router.huggingface.co/v1/chat/completions"
def llm_error(message):
    return {
        "questions": [],
        "status": "error",
        "message": message
    }

def call_llama(prompt, max_tokens=3000):
    HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
    if not HF_TOKEN:
        return llm_error("HF token missing")

    API_URL = "https://router.huggingface.co/v1/chat/completions"

    headers = {
        "Authorization": f"Bearer {HF_TOKEN}",
        "Content-Type": "application/json",
    }

    payload = {
        "model": "meta-llama/Meta-Llama-3-8B-Instruct",
        "messages": [
            {"role": "system", "content": "You are a senior interviewer generating structured interview questions."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3,
        "max_tokens": max_tokens,
        "stream": False
    }

    try:
        resp = requests.post(API_URL, headers=headers, json=payload, timeout=90)

        if resp.status_code != 200:
            return llm_error(f"HF Router error {resp.status_code}: {resp.text}")

        data = resp.json()
        msg = (
            data.get("choices", [{}])[0]
            .get("message", {})
            .get("content", "")
            .strip()
        )

        if not msg:
            return llm_error("Empty LLM response")

        return msg

    except Exception as e:
        return llm_error(str(e))


def copy_snapshot(notes, transcript):
    return f"""
### 📋 Interview Snapshot

**Notes**
{notes}

**Transcript**
{transcript}
"""


def build_question_prompt(resume, jd):
    return f"""
You are a senior interviewer.

STRICT RULES:
- Output VALID JSON only
- No markdown
- No explanations
- Every field MUST be present
- If unsure, use empty strings ""
- If JSON would be invalid, return exactly: {{ "questions": [] }}

Schema:
{{
  "questions": [
    {{
      "id": "Q1",
      "type": "Technical",
      "question": "string",
      "strong_answer": ""
    }}
  ]
}}

Resume:
\"\"\"{resume[:3000]}\"\"\"

Job Description:
\"\"\"{jd[:3000]}\"\"\"
"""


def gradio_generate_questions(resume, jd_text, jd_list, job_idx):
    print(" gradio_generate_questions CALLED")
    """
    Generates questions. 
    Priority: Uses jd_text (current input). If empty, uses the first job from jd_list.
    """
    if not resume: 
        resume = "No resume provided."
    
    # Logic fix: If current text box is empty, check the list
    if jd_text.strip():
        active_jd = jd_text
    elif jd_list and 0 <= job_idx < len(jd_list):
        active_jd = jd_list[job_idx]
    else:
        active_jd = "No JD provided."
        print("Using first job from list for generation.")
    
    if not active_jd.strip():
        active_jd = "No JD provided."

    try:
        raw = call_llama(build_question_prompt(resume, active_jd))

        if isinstance(raw, dict) and raw.get("status") == "error":
            return raw

        
        try:
            parsed = safe_extract_json(raw)
            return {
                "questions": parsed.get("questions", []),
                "status": "ok"
            }
        except Exception as e:
            print("JSON parse failure:", e)
            return {
                "questions": [],
                "status": "error",
                "message": "LLM returned truncated or invalid JSON. Please retry."
            }


    except Exception as e:
        print(f"Gen Q Error: {e}")
        return {
            "questions": [],
            "status": "error",
            "message": str(e)
        }
    

# --- Similarity Computation ---
def compute_similarity(resume_text, job_list, request: gr.Request = None):
    try:
        log_action("Compute Similarity Triggered", request)
        if not resume_text.strip():
            return "<b>Error:</b> Resume cannot be empty.", None
        
        # Clean list
        job_list = [j for j in job_list if j.strip()]
        if len(job_list) == 0:
            return "<b>Error:</b> Please add at least one job description.", None

        results = {}
        for model_name, model in models.items():
            documents = [resume_text] + job_list
            embeddings = model.encode(documents)
            resume_embedding = embeddings[0]
            job_embeddings = embeddings[1:]
            similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
            results[model_name] = similarities

        df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T
        
        # Calculate Metrics
        metrics = {
            "Average": df.mean(axis=0),
            "Variance": df.var(axis=0),
            "Median": df.median(axis=0),
            "Standard Deviation": df.std(axis=0),
            "Certainty Score": 1 - (df.var(axis=0) / (df.var(axis=0).max() + 1e-9)), # Added small epsilon
        }
        for metric_name, values in metrics.items():
            df.loc[metric_name] = values

        model_rows = df.iloc[:-5]
        metrics_rows = df.iloc[-5:]

        styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
        styled_df += metrics_rows.to_html(header=False)

        best_job = metrics["Average"].idxmax()
        reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"
        
        description = """<p><b>Explanation:</b> Higher 'Certainty Score' means models agree more.</p>"""
        return reasoning, styled_df + description

    except Exception as e:
        return f"<b>Error during computation:</b> {str(e)}", None

def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
    from nltk.tokenize import sent_tokenize
    resume_sents = sent_tokenize(resume_text)
    job_sents = sent_tokenize(job_text)
    
    if not resume_sents or not job_sents: return "<b>No sentences found.</b>"

    resume_embeddings = model.encode(resume_sents)
    job_embeddings = model.encode(job_sents)
    sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
    
    flat_sim = sim_matrix.flatten()
    top_k_indices = np.argsort(flat_sim)[::-1][:top_k]

    explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
    for rank, idx in enumerate(top_k_indices, start=1):
        row = idx // job_embeddings.shape[0]
        col = idx % job_embeddings.shape[0]
        score = sim_matrix[row, col]
        explanation_html += f"<p><b>#{rank} (Score {score:.4f}):</b><br><u>Resume:</u> {resume_sents[row]}<br><u>Job:</u> {job_sents[col]}</p>"
    return explanation_html

def set_active_question(label):
    if not label:
        return "General"
    return label.split(":")[0]

def enable_audio(question):
    if question:
        return gr.update(interactive=True)
    return gr.update(interactive=False)

def safe_extract_json(text: str):
    """
    Extracts and repairs JSON object from LLM output.
    Returns parsed dict or raises ValueError.
    """
    # Extract first {...} block
    match = re.search(r"\{[\s\S]*", text)
    if not match:
        raise ValueError("No JSON object found")

    json_text = match.group(0)

    # HARD STOP: if model cut off mid-key or mid-value
    if json_text.strip().endswith(":"):
        raise ValueError("JSON truncated at key")

    # Try strict parse
    return json.loads(json_text)


def explain_model_scores(model_name, resume, job_list, selected_job_idx):
    try:
        model = models[model_name]
        if not job_list: return gr.update(value="<b>Error:</b> No jobs added.", visible=True)
        
        idx = int(selected_job_idx)
        if idx < 0 or idx >= len(job_list): return gr.update(value=f"<b>Error:</b> Invalid job index.", visible=True)

        return gr.update(value=explain_scores_by_sentences(model, resume, job_list[idx]), visible=True)
    except Exception as e:
        return gr.update(value=f"<b>Error:</b> {str(e)}", visible=True)

# --- Interview Logic ---
def transcribe_chunk(audio, current_transcript, active_question, epoch):
    if audio is None or audio[1] is None or len(audio[1]) < 8000:
        return current_transcript, current_transcript

    sr, y = audio
    y = y.astype(np.float32)

    # Silence guard
    if np.max(np.abs(y)) < 0.01:
        return current_transcript, current_transcript

    y /= np.max(np.abs(y))

    filename = f"temp_{uuid.uuid4().hex}.wav"
    import scipy.io.wavfile as wavfile
    wavfile.write(filename, sr, y)

    try:
        result = get_asr()(filename)
        text = result.get("text", "").strip()

        # 🔒 DUPLICATE GUARD
        if not text or text in current_transcript:
            return current_transcript, current_transcript

        prefix = f"[{active_question}] " if active_question else ""
        new_seg = f"{prefix}{text}"
        updated = current_transcript + "\n" + new_seg if current_transcript else new_seg
        return updated, updated

    finally:
        if os.path.exists(filename):
            os.remove(filename)


# --- UI ---
INVITE_CODE = "INDIAMBA"

def add_job_to_list(current_job, job_list):
    if not current_job.strip(): return job_list, job_list, ""
    updated = job_list + [current_job]
    display = "<ul>" + "".join([f"<li>Job {i+1}: {j[:50]}...</li>" for i,j in enumerate(updated)]) + "</ul>"
    choices = [f"Job {i+1}" for i in range(len(updated))]
    return updated, display, "", gr.update(choices=choices, value=choices[-1])

def clear_jobs():
    return [], "<i>No jobs...</i>", "", gr.update(choices=[])


def check_invite(user_code):
    if user_code.strip() == INVITE_CODE:
        return gr.update(visible=False), gr.update(visible=True)
    return gr.update(visible=True), gr.update(visible=False)

def generate_questions_ui(resume, jd_text, jd_list, job_idx):
    result = gradio_generate_questions(resume, jd_text, jd_list, job_idx)


    # ERROR PATH — MUST RETURN EXACTLY 3 VALUES
    if result.get("status") == "error":
        return (
            gr.update(choices=[]),  # questions_radio
            gr.update(value=f"❌ {result.get('message')}", visible=True),  # status_msg
            ""  # transcript_state reset
        )

    qs = result.get("questions", [])
    labels = [f"{q['id']}: {q['question']}" for q in qs]

    return (
        gr.update(choices=labels),  # questions_radio
        gr.update(value="✅ Questions generated", visible=True),  # status_msg
        ""  # transcript reset
    )


# CSS to make the app look cleaner
custom_css = """
.job-display { border: 1px solid #ddd; padding: 10px; background: #f4f4f4; border-radius: 5px; }
"""

with gr.Blocks(css=custom_css) as app:
    
    # State
    transcript_state = gr.State("")
    active_question_state = gr.State("")
    job_list_state = gr.State([])
    audio_epoch = gr.State(0)
    selected_job_index = gr.State(0)


    # --- Login UI (Top Level) ---
    with gr.Column(visible=True) as login_ui:
        gr.Markdown("# 🔐 Second Opinion with Naval")
        code_input = gr.Textbox(label="Invite Code", type="password")
        access_button = gr.Button("Enter")

    # --- Main UI (Top Level, Hidden) ---
    with gr.Column(visible=False) as main_ui:
        with gr.Tabs():
            
            # TAB 1: Matcher
            with gr.Tab("📄 Resume Matcher"):
                gr.Markdown("### ✍️ Input")
                with gr.Row():
                    resume_input = gr.Textbox(label="Paste Resume", lines=10)
                    with gr.Column():
                        single_job_input = gr.Textbox(label="Job Description", lines=5)
                        with gr.Row():
                            add_job_btn = gr.Button("➕ Add Job")
                            clear_jobs_btn = gr.Button("🗑️ Clear")
                        job_display_area = gr.HTML("<i>No jobs added yet...</i>", elem_classes="job-display")
                
                match_button = gr.Button("Match", variant="primary")
                recommendation_output = gr.HTML()
                table_output = gr.HTML()
                
                nerd_button = gr.Button("Papa Please Preach More (Details)", visible=False)
                explanation_output = gr.HTML(visible=False)
                
                with gr.Row(visible=False) as explain_controls:
                    job_index_to_explain = gr.Number(label="Job Index (0=Job 1)", value=0, precision=0)
                    for m in list(models.keys())[:3]:
                        btn = gr.Button(f"Explain {m}")
                        btn.click(explain_model_scores, [gr.State(m), resume_input, job_list_state, job_index_to_explain], explanation_output)
                
                nerd_button.click(lambda: (gr.update(visible=True), gr.update(visible=True)), None, [table_output, explain_controls])

            # TAB 2: Interviewer
            with gr.Tab("🎙️ Live Interview"):

                # interview_questions = gr.State([])
                

                gr.Markdown("### 📌 Interview Questions")
                job_selector = gr.Dropdown(
                    label="Select Job Description",
                    choices=[],
                    interactive=True
                )
                selected_jd_preview = gr.Markdown()

                job_selector.change(
                    lambda x: int(x.split()[-1]) - 1 if x else 0,
                    inputs=job_selector,
                    outputs=selected_job_index
                )

                job_selector.change(
                    lambda i, jds: jds[i] if jds and 0 <= i < len(jds) else "",
                    [selected_job_index, job_list_state],
                    selected_jd_preview
                )


                gen_questions_btn = gr.Button("Generate Questions")
                status_msg = gr.Markdown(visible=False)

                questions_radio = gr.Radio(
                    label="Select Question",
                    choices=[],
                    interactive=True
                )

                gr.Markdown("### 📝 Live Transcript")
                transcript_box = gr.Textbox(
                    lines=8,
                    interactive=False,
                    placeholder="Waiting for speech..."
                )

                gr.Markdown("### 🧠 Interviewer Notes")
                notes_box = gr.Textbox(lines=3)

                copy_btn = gr.Button("Copy Interview Snapshot")
                copy_status = gr.Markdown(visible=False)
                copy_btn.click(
                    copy_snapshot,
                    inputs=[notes_box, transcript_box],
                    outputs=[copy_status]
                )

                
                audio_bridge = gr.Audio(
                    sources=["microphone"],
                    type="numpy",
                    streaming=True,
                    interactive=False   # start disabled
                )

                audio_bridge.stream(
                    transcribe_chunk,
                    [audio_bridge, transcript_state, active_question_state, audio_epoch],
                    [transcript_box, transcript_state]
                )


    # --- Wire Events ---
                gen_questions_btn.click(
                    generate_questions_ui,
                    inputs=[resume_input, single_job_input, job_list_state, selected_job_index],
                    outputs=[questions_radio, status_msg, transcript_state]
                )


                questions_radio.change(
                    lambda q, e: (set_active_question(q), "", e + 1),
                    inputs=[questions_radio, audio_epoch],
                    outputs=[active_question_state, transcript_state, audio_epoch]
                ).then(
                    lambda: "", None, transcript_box
                )


                questions_radio.change(
                    enable_audio,
                    inputs=questions_radio,
                    outputs=audio_bridge
                )


    add_job_btn.click(
        add_job_to_list,
        [single_job_input, job_list_state],
        [job_list_state, job_display_area, single_job_input, job_selector]
    )

    clear_jobs_btn.click(
        clear_jobs,
        [],
        [job_list_state, job_display_area, single_job_input, job_selector]
    )


    match_button.click(
        compute_similarity, 
        [resume_input, job_list_state], 
        [recommendation_output, table_output]
    ).then(lambda: gr.update(visible=True), None, nerd_button)

    
    access_button.click(check_invite, [code_input], [login_ui, main_ui])
# warm up ASR on startup
if asr is None:
    try:
        get_asr()
    except Exception as e:
        print("ASR warmup failed:", e)

app.queue().launch()