File size: 18,491 Bytes
6b83926
 
 
 
 
 
 
 
 
4c2ef66
 
 
6b83926
bb83048
 
6b83926
a3f23f5
6b83926
 
 
413ecb7
4c2ef66
413ecb7
6b83926
507fccc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90d3792
6b83926
90d3792
bb83048
90d3792
 
 
6b83926
90d3792
a3f23f5
6b83926
 
90d3792
6b83926
bb83048
6b83926
 
 
 
 
4c2ef66
413ecb7
 
4c2ef66
 
 
413ecb7
 
 
4c2ef66
413ecb7
 
 
 
 
 
 
 
6b83926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90d3792
6b83926
4c2ef66
6b83926
4c2ef66
 
 
 
 
6b83926
 
4c2ef66
6b83926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77d1d59
bb83048
 
77d1d59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb83048
 
 
6b83926
 
 
 
90d3792
bb83048
90d3792
a3f23f5
bb83048
 
6b83926
 
 
 
 
 
 
 
90d3792
6b83926
 
 
 
 
 
 
 
 
90d3792
6b83926
 
 
 
 
 
 
 
 
 
4c2ef66
6b83926
 
4c2ef66
 
 
 
6b83926
4c2ef66
6b83926
4c2ef66
6b83926
 
4c2ef66
6b83926
 
 
4c2ef66
6b83926
 
 
 
 
 
 
4c2ef66
6b83926
4c2ef66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77d1d59
4c2ef66
 
 
6b83926
90d3792
bb83048
90d3792
ce73588
 
90d3792
6b83926
4c2ef66
6b83926
 
 
4c2ef66
6b83926
 
 
 
a3f23f5
 
 
bb83048
6b83926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3f23f5
 
 
90d3792
 
 
4c2ef66
 
 
 
 
 
 
2db4220
0e18f60
6b83926
 
 
 
 
 
90d3792
 
6b83926
 
 
bb83048
4c2ef66
6b83926
4c2ef66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3f23f5
 
 
bb83048
6b83926
4c2ef66
6b83926
a3f23f5
6b83926
bb83048
 
 
 
6b83926
bb83048
a3f23f5
 
 
bb83048
 
a3f23f5
4c2ef66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb83048
 
4c2ef66
bb83048
 
a3f23f5
bb83048
a3f23f5
bb83048
4c2ef66
a3f23f5
6b83926
 
 
 
90d3792
4c2ef66
a3f23f5
6b83926
 
 
 
 
 
 
 
90d3792
a3f23f5
4c2ef66
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import os
from datetime import datetime
import socket
import nltk
import threading
import time
from huggingface_hub import HfApi, HfFolder

# For sentence tokenization
nltk.download('punkt')
nltk.download("punkt_tab")

###############################
#  LOGGING SETUP
###############################
log_dir = "./logs"
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log")

def upload_logs_to_hf():
    """Upload the local log file to Hugging Face repo."""
    try:
        api = HfApi()
        token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN")
        if not token:
            print(" No HF token found โ€” skipping upload.")
            return
        api.upload_file(
            path_or_fileobj=log_file_path,
            path_in_repo="logs/Second_Opinion_Logs.log",
            repo_id="singhn9/privateSOWN",
            repo_type="space",
            token=token,
        )
        print("Logs uploaded to Hugging Face repo.")
    except Exception as e:
        print(f" Log upload failed: {e}")

def background_log_uploader(interval=300):
    while True:
        time.sleep(interval)
        if os.path.exists(log_file_path):
            upload_logs_to_hf()

threading.Thread(target=background_log_uploader, daemon=True).start()

def log_action(action, request=None):
    try:
        user_ip = "Unknown IP"
        if request and hasattr(request, 'client'):
            user_ip = request.client.host
        else:
            user_ip = socket.gethostbyname(socket.gethostname())
    except Exception:
        user_ip = "Unknown IP"

    timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"

    try:
        with open(log_file_path, 'a') as log_file:
            log_file.write(log_entry)
        print(f"Log entry added: {log_entry.strip()}")
    except Exception as e:
        print(f"Error logging action: {e}")

def log_input_text(resume_text, job_list, user_ip="Unknown IP"):
    try:
        timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
        # specific handling for list or string
        jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list)
        
        log_entry = (
            f"{timestamp} (GMT) - IP: {user_ip}\n"
            f"--- Resume Input ---\n{resume_text}\n\n"
            f"--- Job Descriptions Input ---\n{jobs_str}\n"
            "---------------------------------------------\n"
        )
        with open(log_file_path, 'a') as log_file:
            log_file.write(log_entry)
        print(f"โœ… Logged full user input at {timestamp} from {user_ip}")
    except Exception as e:
        print(f"Error logging input: {e}")

###############################
#  LOAD MODELS
###############################
models = {
    "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
    "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
    "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
    "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
    "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
    "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
    "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
    "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
    "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
    "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
}

###############################
#  MAIN SIMILARITY FUNCTION
###############################
def compute_similarity(resume_text, job_list):
    try:
        if not resume_text.strip():
            return "<b>Error:</b> Resume cannot be empty.", None
        
        # Filter out empty strings from the list
        job_list = [j for j in job_list if j.strip()]

        if len(job_list) == 0:
            return "<b>Error:</b> Please add at least one job description.", None

        results = {}
        for model_name, model in models.items():
            documents = [resume_text] + job_list
            embeddings = model.encode(documents)
            resume_embedding = embeddings[0]
            job_embeddings = embeddings[1:]
            similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
            results[model_name] = similarities

        df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T

        metrics = {
            "Average": df.mean(axis=0),
            "Variance": df.var(axis=0),
            "Median": df.median(axis=0),
            "Standard Deviation": df.std(axis=0),
            "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()),
        }
        for metric_name, values in metrics.items():
            df.loc[metric_name] = values

        model_rows = df.iloc[:-5]
        metrics_rows = df.iloc[-5:]

        styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
        styled_df += metrics_rows.to_html(header=False)

        best_job = metrics["Average"].idxmax()
        reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"

        # --- RESTORED FULL DESCRIPTION AND REFERENCES ---
        description = """
        <p><b>Explanation of the Table:</b></p>
        <ul>
            <li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li>
            <ul>
                <li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft. 
                    (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC. 
                    (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
                <li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions. 
                    (<a href="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1" target="_blank">Model Card</a>).</li>
                <li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks. 
                    (<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets. 
                    (<a href="https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2" target="_blank">Model Card</a>).</li>
                <li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings. 
                    (<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>, 
                    <a href="https://huggingface.co/sentence-transformers/all-distilroberta-v1" target="_blank">Model Card</a>).</li>
                <li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments. 
                    (<a href="https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2" target="_blank">Model Card</a>).</li>
                <li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks. 
                    (<a href="https://huggingface.co/sentence-transformers/multi-qa-distilbert-cos-v1" target="_blank">Model Card</a>).</li>
                <li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings. 
                    (<a href="https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2" target="_blank">Model Card</a>).</li>
                <li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding. 
                    (<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2" target="_blank">Model Card</a>).</li>
            </ul>
            <li><b>Metrics:</b>
                <ul>
                    <li><b>Average:</b> Mean similarity score for each job description.</li>
                    <li><b>Variance:</b> Variability in the similarity scores.</li>
                    <li><b>Median:</b> Middle value of the similarity scores.</li>
                    <li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li>
                    <li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li>
                </ul>
            </li>
        </ul>
        <p>If you liked this application, feel free to send your feedback, suggestions, or adulations to 
        <b>[email protected]</b>.</p>
        """

        return reasoning, styled_df + description

    except Exception as e:
        return f"<b>Error during computation:</b> {str(e)}", None

###############################
#  APPROACH A EXPLANATION
###############################
def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
    from nltk.tokenize import sent_tokenize

    resume_sents = sent_tokenize(resume_text)
    job_sents = sent_tokenize(job_text)

    if not resume_sents or not job_sents:
        return "<b>No sentences found in resume or job description.</b>"

    resume_embeddings = model.encode(resume_sents)
    job_embeddings = model.encode(job_sents)

    sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
    flat_sim = sim_matrix.flatten()
    top_k_indices = np.argsort(flat_sim)[::-1][:top_k]

    explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
    for rank, idx in enumerate(top_k_indices, start=1):
        row = idx // job_embeddings.shape[0]
        col = idx % job_embeddings.shape[0]
        score = sim_matrix[row, col]

        resume_sentence = resume_sents[row]
        job_sentence = job_sents[col]
        explanation_html += f"""
        <p><b>#{rank}:</b><br>
        <b>Resume:</b> {resume_sentence}<br>
        <b>Job:</b> {job_sentence}<br>
        <b>Similarity Score:</b> {score:.4f}</p>
        """
    return explanation_html

def explain_model_scores(model_name, resume, job_list, selected_job_idx, top_k=3):
    try:
        model = models[model_name]
        
        # Validate inputs
        if not job_list or len(job_list) == 0:
             return gr.update(value="<b>Error:</b> No jobs added to analyze.", visible=True)

        selected_job_idx = int(selected_job_idx)
        if selected_job_idx < 0 or selected_job_idx >= len(job_list):
            return gr.update(value=f"<b>Error:</b> Invalid job index {selected_job_idx}. You only have {len(job_list)} jobs.", visible=True)

        resume_text = resume.strip()
        job_text = job_list[selected_job_idx].strip()

        if not resume_text:
            return gr.update(value="<b>No resume text provided.</b>", visible=True)
        
        explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k)
        return gr.update(value=explanation_html, visible=True)

    except Exception as e:
        return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True)

###############################
#  GRADIO APP LOGIC
###############################

# --- Job List Management ---
def add_job_to_list(current_job, job_list):
    if not current_job.strip():
        return job_list, job_list, "" # Do nothing if empty
    
    updated_list = job_list + [current_job]
    # Create a nice HTML display of current jobs
    display_html = "<ul>"
    for i, job in enumerate(updated_list):
        snippet = job[:100] + "..." if len(job) > 100 else job
        display_html += f"<li><b>Job {i+1}:</b> {snippet}</li>"
    display_html += "</ul>"
    
    return updated_list, display_html, "" # Clear the input box

def clear_jobs():
    return [], "<i>No jobs added yet...</i>", ""

# --- Main Processing ---
def process_and_display(resume, job_list, request=None):
    try:
        user_ip = "Unknown IP"
        if request and hasattr(request, 'client'):
            user_ip = request.client.host
        else:
            user_ip = socket.gethostbyname(socket.gethostname())

        log_action(f"Process and display triggered for IP: {user_ip}")
        log_input_text(resume, job_list, user_ip=user_ip)

        yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False)

        reasoning, table = compute_similarity(resume, job_list)

        if table:
            log_action(f"Successfully processed and displayed results for IP: {user_ip}")
            yield (
                gr.update(value="", visible=False),
                reasoning,
                table,
                gr.update(value="Papa Please Preach More", visible=True),
            )
        else:
            log_action(f"Error: No results to display for IP: {user_ip}")
            yield (
                gr.update(value="", visible=False),
                reasoning,
                "<p>No results to display.</p>",
                gr.update(visible=False),
            )
    except Exception as e:
        log_action(f"Error during process for IP {user_ip}: {str(e)}")
        raise e

def show_details(table):
    return gr.update(value=table, visible=True)

INVITE_CODE = "INDIAMBA"
access_granted = gr.State(False)

###############################
#  BUILD THE GRADIO INTERFACE
###############################
with gr.Blocks(css="""
    .job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; }
""") as app:
    
    # State to hold the list of jobs
    job_list_state = gr.State([])

    gr.Markdown("# Second Opinion with Naval v1.2 โ€“ โ€œMorning Dazeโ€")
    gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ")
    gr.Markdown("๐Ÿ” This app requires an invite code to continue. Ask Naval if you don't have one.")

    with gr.Row():
        code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code")
        access_button = gr.Button("Submit")

    access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False)

    main_ui = gr.Group(visible=False)

    with main_ui:
        gr.Markdown("### โœ๏ธ Input Section")
        
        with gr.Row():
            # Left Column: Resume
            with gr.Column(scale=1):
                resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...")
            
            # Right Column: Dynamic Job Entry
            with gr.Column(scale=1):
                gr.Markdown("#### Add Job Descriptions")
                single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.")
                
                with gr.Row():
                    add_job_btn = gr.Button("โž• Add Job", variant="secondary")
                    clear_jobs_btn = gr.Button("๐Ÿ—‘๏ธ Clear All Jobs", variant="stop")
                
                gr.Markdown("#### Your Added Jobs List:")
                job_display_area = gr.HTML(value="<i>No jobs added yet...</i>", elem_classes="job-display")

        gr.Markdown("---")

        gr.Markdown("### ๐Ÿ” Matching Section")
        with gr.Row():
            match_button = gr.Button("Match My Resume to Added Jobs", variant="primary")
            processing_output = gr.HTML(value="", visible=False)

        with gr.Row():
            recommendation_output = gr.HTML(label="Recommendation", visible=True)
        with gr.Row():
            table_output = gr.HTML(label="Similarity Table", visible=False)

        with gr.Row():
            nerd_button = gr.Button("Papa Please Preach More", visible=False)

        gr.Markdown("---")

        gr.Markdown("### ๐Ÿ“Š Explanation Section")
        explanation_output = gr.HTML(label="Model Explanation", visible=False)

        # Event: Add Job
        add_job_btn.click(
            add_job_to_list,
            inputs=[single_job_input, job_list_state],
            outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input
        )
        
        # Event: Clear Jobs
        clear_jobs_btn.click(
            clear_jobs,
            inputs=[],
            outputs=[job_list_state, job_display_area, single_job_input]
        )

        # Event: Match
        match_button.click(
            process_and_display,
            inputs=[resume_input, job_list_state],
            outputs=[processing_output, recommendation_output, table_output, nerd_button]
        )

        nerd_button.click(show_details, inputs=[table_output], outputs=[table_output])

        with gr.Row():
            job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0)

        with gr.Row():
            for m_name in models.keys():
                btn = gr.Button(f"Explain {m_name}")
                btn.click(
                    fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx),
                    inputs=[resume_input, job_list_state, job_index_to_explain],
                    outputs=[explanation_output],
                )

    def check_invite(user_code):
        if user_code.strip() == INVITE_CODE:
            return True, gr.update(visible=False), gr.update(visible=True)
        else:
            return False, gr.update(visible=True), gr.update(visible=False)

    access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui])

app.launch()