Spaces:

singhn9
/

privateSOWN

Running

File size: 18,491 Bytes

import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import os
from datetime import datetime
import socket
import nltk
import threading
import time
from huggingface_hub import HfApi, HfFolder

# For sentence tokenization
nltk.download('punkt')
nltk.download("punkt_tab")

###############################
#  LOGGING SETUP
###############################
log_dir = "./logs"
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log")

def upload_logs_to_hf():
    """Upload the local log file to Hugging Face repo."""
    try:
        api = HfApi()
        token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN")
        if not token:
            print(" No HF token found — skipping upload.")
            return
        api.upload_file(
            path_or_fileobj=log_file_path,
            path_in_repo="logs/Second_Opinion_Logs.log",
            repo_id="singhn9/privateSOWN",
            repo_type="space",
            token=token,
        )
        print("Logs uploaded to Hugging Face repo.")
    except Exception as e:
        print(f" Log upload failed: {e}")

def background_log_uploader(interval=300):
    while True:
        time.sleep(interval)
        if os.path.exists(log_file_path):
            upload_logs_to_hf()

threading.Thread(target=background_log_uploader, daemon=True).start()

def log_action(action, request=None):
    try:
        user_ip = "Unknown IP"
        if request and hasattr(request, 'client'):
            user_ip = request.client.host
        else:
            user_ip = socket.gethostbyname(socket.gethostname())
    except Exception:
        user_ip = "Unknown IP"

    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"

    try:
        with open(log_file_path, 'a') as log_file:
            log_file.write(log_entry)
        print(f"Log entry added: {log_entry.strip()}")
    except Exception as e:
        print(f"Error logging action: {e}")

def log_input_text(resume_text, job_list, user_ip="Unknown IP"):
    try:
        timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
        # specific handling for list or string
        jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list)
        
        log_entry = (
            f"{timestamp} (GMT) - IP: {user_ip}\n"
            f"--- Resume Input ---\n{resume_text}\n\n"
            f"--- Job Descriptions Input ---\n{jobs_str}\n"
            "---------------------------------------------\n"
        )
        with open(log_file_path, 'a') as log_file:
            log_file.write(log_entry)
        print(f"✅ Logged full user input at {timestamp} from {user_ip}")
    except Exception as e:
        print(f"Error logging input: {e}")

###############################
#  LOAD MODELS
###############################
models = {
    "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
    "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
    "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
    "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
    "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
    "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
    "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
    "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
    "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
    "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
}

###############################
#  MAIN SIMILARITY FUNCTION
###############################
def compute_similarity(resume_text, job_list):
    try:
        if not resume_text.strip():
            return "<b>Error:</b> Resume cannot be empty.", None
        
        # Filter out empty strings from the list
        job_list = [j for j in job_list if j.strip()]

        if len(job_list) == 0:
            return "<b>Error:</b> Please add at least one job description.", None

        results = {}
        for model_name, model in models.items():
            documents = [resume_text] + job_list
            embeddings = model.encode(documents)
            resume_embedding = embeddings[0]
            job_embeddings = embeddings[1:]
            similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
            results[model_name] = similarities

        df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T

        metrics = {
            "Average": df.mean(axis=0),
            "Variance": df.var(axis=0),
            "Median": df.median(axis=0),
            "Standard Deviation": df.std(axis=0),
            "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()),
        }
        for metric_name, values in metrics.items():
            df.loc[metric_name] = values

        model_rows = df.iloc[:-5]
        metrics_rows = df.iloc[-5:]

        styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
        styled_df += metrics_rows.to_html(header=False)

        best_job = metrics["Average"].idxmax()
        reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"

        # --- RESTORED FULL DESCRIPTION AND REFERENCES ---
        description = """
        <p><b>Explanation of the Table:</b></p>
        <ul>
            <li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li>
            <ul>
                <li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft. 
                    (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC. 
                    (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
                <li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions. 
                    (<a href="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1" target="_blank">Model Card</a>).</li>
                <li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks. 
                    (<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets. 
                    (<a href="https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2" target="_blank">Model Card</a>).</li>
                <li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings. 
                    (<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-distilroberta-v1" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments. 
                    (<a href="https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2" target="_blank">Model Card</a>).</li>
                <li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks. 
                    (<a href="https://huggingface.co/sentence-transformers/multi-qa-distilbert-cos-v1" target="_blank">Model Card</a>).</li>
                <li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings. 
                    (<a href="https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2" target="_blank">Model Card</a>).</li>
                <li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding. 
                    (<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2" target="_blank">Model Card</a>).</li>
            </ul>
            <li><b>Metrics:</b>
                <ul>
                    <li><b>Average:</b> Mean similarity score for each job description.</li>
                    <li><b>Variance:</b> Variability in the similarity scores.</li>
                    <li><b>Median:</b> Middle value of the similarity scores.</li>
                    <li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li>
                    <li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li>
                </ul>
            </li>
        </ul>
        <p>If you liked this application, feel free to send your feedback, suggestions, or adulations to 
        <b>[email protected]</b>.</p>
        """

        return reasoning, styled_df + description

    except Exception as e:
        return f"<b>Error during computation:</b> {str(e)}", None

###############################
#  APPROACH A EXPLANATION
###############################
def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
    from nltk.tokenize import sent_tokenize

    resume_sents = sent_tokenize(resume_text)
    job_sents = sent_tokenize(job_text)

    if not resume_sents or not job_sents:
        return "<b>No sentences found in resume or job description.</b>"

    resume_embeddings = model.encode(resume_sents)
    job_embeddings = model.encode(job_sents)

    sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
    flat_sim = sim_matrix.flatten()
    top_k_indices = np.argsort(flat_sim)[::-1][:top_k]

    explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
    for rank, idx in enumerate(top_k_indices, start=1):
        row = idx // job_embeddings.shape[0]
        col = idx % job_embeddings.shape[0]
        score = sim_matrix[row, col]

        resume_sentence = resume_sents[row]
        job_sentence = job_sents[col]
        explanation_html += f"""
        <p><b>#{rank}:</b><br>
        <b>Resume:</b> {resume_sentence}<br>
        <b>Job:</b> {job_sentence}<br>
        <b>Similarity Score:</b> {score:.4f}</p>
        """
    return explanation_html

def explain_model_scores(model_name, resume, job_list, selected_job_idx, top_k=3):
    try:
        model = models[model_name]
        
        # Validate inputs
        if not job_list or len(job_list) == 0:
             return gr.update(value="<b>Error:</b> No jobs added to analyze.", visible=True)

        selected_job_idx = int(selected_job_idx)
        if selected_job_idx < 0 or selected_job_idx >= len(job_list):
            return gr.update(value=f"<b>Error:</b> Invalid job index {selected_job_idx}. You only have {len(job_list)} jobs.", visible=True)

        resume_text = resume.strip()
        job_text = job_list[selected_job_idx].strip()

        if not resume_text:
            return gr.update(value="<b>No resume text provided.</b>", visible=True)
        
        explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k)
        return gr.update(value=explanation_html, visible=True)

    except Exception as e:
        return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True)

###############################
#  GRADIO APP LOGIC
###############################

# --- Job List Management ---
def add_job_to_list(current_job, job_list):
    if not current_job.strip():
        return job_list, job_list, "" # Do nothing if empty
    
    updated_list = job_list + [current_job]
    # Create a nice HTML display of current jobs
    display_html = "<ul>"
    for i, job in enumerate(updated_list):
        snippet = job[:100] + "..." if len(job) > 100 else job
        display_html += f"<li><b>Job {i+1}:</b> {snippet}</li>"
    display_html += "</ul>"
    
    return updated_list, display_html, "" # Clear the input box

def clear_jobs():
    return [], "<i>No jobs added yet...</i>", ""

# --- Main Processing ---
def process_and_display(resume, job_list, request=None):
    try:
        user_ip = "Unknown IP"
        if request and hasattr(request, 'client'):
            user_ip = request.client.host
        else:
            user_ip = socket.gethostbyname(socket.gethostname())

        log_action(f"Process and display triggered for IP: {user_ip}")
        log_input_text(resume, job_list, user_ip=user_ip)

        yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False)

        reasoning, table = compute_similarity(resume, job_list)

        if table:
            log_action(f"Successfully processed and displayed results for IP: {user_ip}")
            yield (
                gr.update(value="", visible=False),
                reasoning,
                table,
                gr.update(value="Papa Please Preach More", visible=True),
            )
        else:
            log_action(f"Error: No results to display for IP: {user_ip}")
            yield (
                gr.update(value="", visible=False),
                reasoning,
                "<p>No results to display.</p>",
                gr.update(visible=False),
            )
    except Exception as e:
        log_action(f"Error during process for IP {user_ip}: {str(e)}")
        raise e

def show_details(table):
    return gr.update(value=table, visible=True)

INVITE_CODE = "INDIAMBA"
access_granted = gr.State(False)

###############################
#  BUILD THE GRADIO INTERFACE
###############################
with gr.Blocks(css="""
    .job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; }
""") as app:
    
    # State to hold the list of jobs
    job_list_state = gr.State([])

    gr.Markdown("# Second Opinion with Naval v1.2 – “Morning Daze”")
    gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ")
    gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.")

    with gr.Row():
        code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code")
        access_button = gr.Button("Submit")

    access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False)

    main_ui = gr.Group(visible=False)

    with main_ui:
        gr.Markdown("### ✍️ Input Section")
        
        with gr.Row():
            # Left Column: Resume
            with gr.Column(scale=1):
                resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...")
            
            # Right Column: Dynamic Job Entry
            with gr.Column(scale=1):
                gr.Markdown("#### Add Job Descriptions")
                single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.")
                
                with gr.Row():
                    add_job_btn = gr.Button("➕ Add Job", variant="secondary")
                    clear_jobs_btn = gr.Button("🗑️ Clear All Jobs", variant="stop")
                
                gr.Markdown("#### Your Added Jobs List:")
                job_display_area = gr.HTML(value="<i>No jobs added yet...</i>", elem_classes="job-display")

        gr.Markdown("---")

        gr.Markdown("### 🔍 Matching Section")
        with gr.Row():
            match_button = gr.Button("Match My Resume to Added Jobs", variant="primary")
            processing_output = gr.HTML(value="", visible=False)

        with gr.Row():
            recommendation_output = gr.HTML(label="Recommendation", visible=True)
        with gr.Row():
            table_output = gr.HTML(label="Similarity Table", visible=False)

        with gr.Row():
            nerd_button = gr.Button("Papa Please Preach More", visible=False)

        gr.Markdown("---")

        gr.Markdown("### 📊 Explanation Section")
        explanation_output = gr.HTML(label="Model Explanation", visible=False)

        # Event: Add Job
        add_job_btn.click(
            add_job_to_list,
            inputs=[single_job_input, job_list_state],
            outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input
        )
        
        # Event: Clear Jobs
        clear_jobs_btn.click(
            clear_jobs,
            inputs=[],
            outputs=[job_list_state, job_display_area, single_job_input]
        )

        # Event: Match
        match_button.click(
            process_and_display,
            inputs=[resume_input, job_list_state],
            outputs=[processing_output, recommendation_output, table_output, nerd_button]
        )

        nerd_button.click(show_details, inputs=[table_output], outputs=[table_output])

        with gr.Row():
            job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0)

        with gr.Row():
            for m_name in models.keys():
                btn = gr.Button(f"Explain {m_name}")
                btn.click(
                    fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx),
                    inputs=[resume_input, job_list_state, job_index_to_explain],
                    outputs=[explanation_output],
                )

    def check_invite(user_code):
        if user_code.strip() == INVITE_CODE:
            return True, gr.update(visible=False), gr.update(visible=True)
        else:
            return False, gr.update(visible=True), gr.update(visible=False)

    access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui])

app.launch()