singhn9 commited on
Commit
6b83926
·
verified ·
1 Parent(s): 53bc6a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +874 -0
app.py ADDED
@@ -0,0 +1,874 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import pandas as pd
5
+ import numpy as np
6
+ import os
7
+ from datetime import datetime
8
+ import socket
9
+ import nltk
10
+ #nltk.download("all")
11
+
12
+ # For sentence tokenization
13
+ nltk.download('punkt')
14
+ nltk.download("punkt_tab")
15
+ ###############################
16
+ # LOGGING SETUP
17
+ ###############################
18
+ log_file_path = os.path.expanduser("~/Second_Opinion_Logs.log")
19
+
20
+ if not os.path.exists(log_file_path):
21
+ with open(log_file_path, mode='w') as log_file:
22
+ log_file.write("Timestamp (GMT) - IP: [IP Address] - [Action]\n")
23
+
24
+ def log_action(action, request=None):
25
+ """
26
+ Logs major actions with IP address and UTC timestamp.
27
+ """
28
+ try:
29
+ user_ip = "Unknown IP"
30
+ if request and hasattr(request, 'client'):
31
+ user_ip = request.client.host
32
+ else:
33
+ user_ip = socket.gethostbyname(socket.gethostname())
34
+ if user_ip in ("127.0.0.1", "::1"):
35
+ user_ip = "Localhost (127.0.0.1)"
36
+ except Exception:
37
+ user_ip = "Unknown IP"
38
+
39
+ timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
40
+ log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"
41
+
42
+ try:
43
+ with open(log_file_path, 'a') as log_file:
44
+ log_file.write(log_entry)
45
+ print(f"Log entry added: {log_entry.strip()}")
46
+ except Exception as e:
47
+ print(f"Error logging action: {e}")
48
+
49
+ ###############################
50
+ # LOAD MODELS
51
+ ###############################
52
+ models = {
53
+ "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
54
+ "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
55
+ "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
56
+ "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
57
+ "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
58
+ "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
59
+ "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
60
+ "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
61
+ "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
62
+ "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
63
+ }
64
+
65
+ ###############################
66
+ # MAIN SIMILARITY FUNCTION
67
+ ###############################
68
+ def compute_similarity(resume_text, job_descriptions):
69
+ """
70
+ Computes similarity for each model between the resume_text and each job description (split by double line breaks).
71
+ Returns a tuple of (reasoning_html, table_html).
72
+ """
73
+ try:
74
+ if not resume_text.strip() or not job_descriptions.strip():
75
+ return "<b>Error:</b> Resume and job descriptions cannot be empty.", None
76
+
77
+ # Split job descriptions by double line break
78
+ job_list = job_descriptions.split("\n\n")
79
+ if len(job_list) == 0:
80
+ return "<b>Error:</b> Provide at least one job description separated by double line breaks.", None
81
+
82
+ # Dictionary to hold model results
83
+ results = {}
84
+ for model_name, model in models.items():
85
+ # Encode resume and all job descriptions
86
+ documents = [resume_text] + job_list
87
+ embeddings = model.encode(documents)
88
+ resume_embedding = embeddings[0]
89
+ job_embeddings = embeddings[1:]
90
+ similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
91
+ results[model_name] = similarities
92
+
93
+ # Convert to DataFrame
94
+ df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T
95
+
96
+ # Compute metrics
97
+ metrics = {
98
+ "Average": df.mean(axis=0),
99
+ "Variance": df.var(axis=0),
100
+ "Median": df.median(axis=0),
101
+ "Standard Deviation": df.std(axis=0),
102
+ "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()),
103
+ }
104
+ for metric_name, values in metrics.items():
105
+ df.loc[metric_name] = values
106
+
107
+ # Separate model rows from metrics rows
108
+ model_rows = df.iloc[:-5]
109
+ metrics_rows = df.iloc[-5:]
110
+
111
+ # Style the DataFrame
112
+ styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
113
+ styled_df += metrics_rows.to_html(header=False)
114
+
115
+ # Identify best job by highest average similarity
116
+ best_job = metrics["Average"].idxmax()
117
+ reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"
118
+
119
+ # Additional description
120
+ description = """
121
+ <p><b>Explanation of the Table:</b></p>
122
+ <ul>
123
+ <li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li>
124
+ <ul>
125
+ <li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft. (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>).</li>
126
+ <li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC. (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>).</li>
127
+ <li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions.</li>
128
+ <li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks. (<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>).</li>
129
+ <li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets.</li>
130
+ <li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings. (<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>).</li>
131
+ <li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments.</li>
132
+ <li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks.</li>
133
+ <li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings.</li>
134
+ <li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding.</li>
135
+ </ul>
136
+ <li><b>Metrics:</b>
137
+ <ul>
138
+ <li><b>Average:</b> Mean similarity score for each job description.</li>
139
+ <li><b>Variance:</b> Variability in the similarity scores.</li>
140
+ <li><b>Median:</b> Middle value of the similarity scores.</li>
141
+ <li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li>
142
+ <li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li>
143
+ </ul>
144
+ </li>
145
+ </ul>
146
+ <p>If you liked this application, feel free to send your feedback, suggestions, or adulations to <b>[email protected]</b>.</p>
147
+ """
148
+
149
+ return reasoning, styled_df + description
150
+
151
+ except Exception as e:
152
+ return f"<b>Error during computation:</b> {str(e)}", None
153
+
154
+ ###############################
155
+ # APPROACH A EXPLANATION
156
+ ###############################
157
+ def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
158
+ """
159
+ Given a SentenceTransformer model, a resume, and one job description,
160
+ returns HTML with the top-k (resume_sentence, job_sentence) pairs by similarity.
161
+ """
162
+ from nltk.tokenize import sent_tokenize
163
+
164
+ resume_sents = sent_tokenize(resume_text)
165
+ job_sents = sent_tokenize(job_text)
166
+
167
+ if not resume_sents or not job_sents:
168
+ return "<b>No sentences found in resume or job description.</b>"
169
+
170
+ # Encode each sentence
171
+ resume_embeddings = model.encode(resume_sents)
172
+ job_embeddings = model.encode(job_sents)
173
+
174
+ # Pairwise cosine similarity
175
+ sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
176
+
177
+ # Flatten and pick top K
178
+ flat_sim = sim_matrix.flatten()
179
+ top_k_indices = np.argsort(flat_sim)[::-1][:top_k]
180
+
181
+ explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
182
+ for rank, idx in enumerate(top_k_indices, start=1):
183
+ row = idx // job_embeddings.shape[0]
184
+ col = idx % job_embeddings.shape[0]
185
+ score = sim_matrix[row, col]
186
+
187
+ resume_sentence = resume_sents[row]
188
+ job_sentence = job_sents[col]
189
+ explanation_html += f"""
190
+ <p><b>#{rank}:</b><br>
191
+ <b>Resume:</b> {resume_sentence}<br>
192
+ <b>Job:</b> {job_sentence}<br>
193
+ <b>Similarity Score:</b> {score:.4f}</p>
194
+ """
195
+
196
+ return explanation_html
197
+
198
+ def explain_model_scores(model_name, resume, jobs, selected_job_idx, top_k=3):
199
+ """
200
+ For a given model_name, resume, and job descriptions, returns a gr.update object
201
+ containing HTML that explains which sentence pairs are most similar, making the
202
+ explanation visible in the Gradio app.
203
+ """
204
+ try:
205
+ model = models[model_name]
206
+ job_list = jobs.split("\n\n")
207
+
208
+ # Check valid job index
209
+ if selected_job_idx < 0 or selected_job_idx >= len(job_list):
210
+ return gr.update(
211
+ value=f"<b>Error:</b> Invalid job index {selected_job_idx}.",
212
+ visible=True
213
+ )
214
+
215
+ resume_text = resume.strip()
216
+ job_text = job_list[int(selected_job_idx)].strip()
217
+
218
+ if not resume_text:
219
+ return gr.update(value="<b>No resume text provided.</b>", visible=True)
220
+
221
+ if not job_text:
222
+ return gr.update(value=f"<b>Job description #{selected_job_idx+1} is empty.</b>", visible=True)
223
+
224
+ explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k)
225
+ return gr.update(value=explanation_html, visible=True)
226
+
227
+ except Exception as e:
228
+ return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True)
229
+
230
+ ###############################
231
+ # GRADIO APP
232
+ ###############################
233
+ def process_and_display(resume, jobs, request=None):
234
+ """
235
+ Main callback to compute similarity, logs the user action, and yields the result.
236
+ """
237
+ try:
238
+ user_ip = "Unknown IP"
239
+ if request and hasattr(request, 'client'):
240
+ user_ip = request.client.host
241
+
242
+ # Log the event
243
+ log_action(f"Process and display triggered for IP: {user_ip}")
244
+
245
+ # Show a "processing" message first
246
+ yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False)
247
+
248
+ log_action(f"Starting similarity computation for IP: {user_ip}")
249
+ reasoning, table = compute_similarity(resume, jobs)
250
+
251
+ if table:
252
+ log_action(f"Successfully processed and displayed results for IP: {user_ip}")
253
+ yield (
254
+ gr.update(value="", visible=False), # Clear the "processing" message
255
+ reasoning, # Recommendation text
256
+ table, # Table of similarities
257
+ gr.update(value="Papa Please Preach More", visible=True),
258
+ )
259
+ else:
260
+ log_action(f"Error: No results to display for IP: {user_ip}")
261
+ yield (
262
+ gr.update(value="", visible=False),
263
+ reasoning,
264
+ "<p>No results to display.</p>",
265
+ gr.update(visible=False),
266
+ )
267
+ except Exception as e:
268
+ log_action(f"Error during process for IP {user_ip}: {str(e)}")
269
+ raise e
270
+
271
+ def show_details(table):
272
+ """
273
+ Callback to reveal the full table upon user request.
274
+ """
275
+ return gr.update(value=table, visible=True)
276
+
277
+ INVITE_CODE = "INDIAMBA"
278
+ access_granted = gr.State(False)
279
+
280
+ ###############################
281
+ # BUILD THE GRADIO INTERFACE
282
+ ###############################
283
+ with gr.Blocks() as app:
284
+ gr.Markdown("# Second Opinion with Naval v1.1 – “Midnight Tears”")
285
+ gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.")
286
+
287
+ with gr.Row():
288
+ code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code")
289
+ access_button = gr.Button("Submit")
290
+
291
+ access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False)
292
+
293
+ main_ui = gr.Group(visible=False)
294
+
295
+ with main_ui:
296
+
297
+ gr.Markdown(
298
+ "This application matches a resume to job descriptions using SentenceTransformer models, "
299
+ "provides similarity scores, and can explain which sentences contributed to each model's score."
300
+ )
301
+
302
+ with gr.Row():
303
+ resume_input = gr.Textbox(label="Paste Resume", lines=5, placeholder="Paste your resume here...")
304
+ job_input = gr.Textbox(label="Paste Job Descriptions", lines=5,
305
+ placeholder="Paste one or more job descriptions here (double line break to separate).")
306
+
307
+ with gr.Row():
308
+ match_button = gr.Button("Match My Resume to Jobs")
309
+ processing_output = gr.HTML(value="", visible=False)
310
+
311
+ with gr.Row():
312
+ recommendation_output = gr.HTML(label="Recommendation", visible=True)
313
+ with gr.Row():
314
+ table_output = gr.HTML(label="Similarity Table", visible=False)
315
+
316
+ with gr.Row():
317
+ nerd_button = gr.Button("Papa Please Preach More", visible=False)
318
+
319
+ # "Explain" output component: hidden initially
320
+ explanation_output = gr.HTML(label="Model Explanation", visible=False)
321
+
322
+ # Main match button -> calls process_and_display
323
+ match_button.click(
324
+ process_and_display,
325
+ inputs=[resume_input, job_input],
326
+ outputs=[processing_output, recommendation_output, table_output, nerd_button]
327
+ )
328
+
329
+ # Button to show the full table
330
+ nerd_button.click(
331
+ show_details,
332
+ inputs=[table_output],
333
+ outputs=[table_output],
334
+ )
335
+
336
+ # Input for user to pick which job to explain
337
+ with gr.Row():
338
+ job_index_to_explain = gr.Number(label="Job Index (0-based)", value=0)
339
+
340
+ # Buttons to explain each model's sentence-level similarity
341
+ with gr.Row():
342
+ for m_name in models.keys():
343
+ btn = gr.Button(f"Explain {m_name}")
344
+ btn.click(
345
+ fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx),
346
+ inputs=[resume_input, job_input, job_index_to_explain],
347
+ outputs=[explanation_output],
348
+ )
349
+
350
+
351
+ # --- INVITE CODE VERIFICATION FUNCTION ---
352
+ def check_invite(user_code):
353
+ if user_code.strip() == INVITE_CODE:
354
+ return True, gr.update(visible=False), gr.update(visible=True)
355
+ else:
356
+ return False, gr.update(visible=True), gr.update(visible=False)
357
+
358
+ access_button.click(
359
+ fn=check_invite,
360
+ inputs=[code_input],
361
+ outputs=[access_granted, access_warning, main_ui],
362
+ )
363
+ # Optional: custom CSS
364
+ app.css = """
365
+ /* Make the entire background a gradient */
366
+ body {
367
+ background: linear-gradient(120deg, #E0C3FC 0%, #8EC5FC 100%);
368
+ margin: 0;
369
+ padding: 0;
370
+ font-family: 'Open Sans', sans-serif;
371
+ min-height: 100vh; /* ensure full coverage of viewport */
372
+ }
373
+
374
+ /* Let the gradient show through behind the .gradio-container */
375
+ .gradio-container {
376
+ background-color: transparent !important;
377
+ color: #333333;
378
+ }
379
+
380
+ /* Your original style for centered recommendation text */
381
+ #centered-recommendation {
382
+ text-align: center;
383
+ font-size: 1.2em;
384
+ margin-top: 20px;
385
+ margin-bottom: 20px;
386
+ color: #2c3e50; /* a nice dark teal */
387
+ }
388
+
389
+ /* Example button styling to match the gradient vibe */
390
+ button.primary, button.secondary {
391
+ background-color: #3498db !important;
392
+ border: 1px solid #2980b9 !important;
393
+ color: #fff !important;
394
+ border-radius: 4px !important;
395
+ }
396
+
397
+ /* Optional: style textboxes or other inputs for a cleaner look */
398
+ textarea, input[type='text'], input[type='number'] {
399
+ background-color: #FFFFFF;
400
+ color: #333;
401
+ border-radius: 6px !important;
402
+ border: 1px solid #ccc !important;
403
+ padding: 8px !important;
404
+ }
405
+ /* (Optional) If you have an h1 or h2, you can style them too */
406
+ h1, h2, h3 {
407
+ color: #2c3e50;
408
+ }
409
+ """
410
+ app.launch(share=True)
411
+
412
+
413
+
414
+
415
+
416
+
417
+ import os
418
+ # import io
419
+ # import json
420
+ # import random
421
+ # import tempfile
422
+ # import smtplib
423
+ # from email.message import EmailMessage
424
+ # from datetime import datetime, timedelta, timezone
425
+ # from fastapi import FastAPI, UploadFile, Form, Request
426
+ # from fastapi.responses import JSONResponse
427
+ # from starlette.middleware.cors import CORSMiddleware
428
+ # from sentence_transformers import SentenceTransformer, util
429
+ # from PyPDF2 import PdfReader
430
+ # import gradio as gr
431
+ # import torch
432
+ # import pytz
433
+ # from dropbox_utils import upload_to_dropbox
434
+ # import asyncio
435
+ # os.environ["HF_HOME"] = "/app/cache"
436
+ # os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
437
+ # os.environ["HF_DATASETS_CACHE"] = "/app/cache"
438
+
439
+ # smtp_user = os.getenv("SMTP_USER")
440
+ # smtp_pass = os.getenv("SMTP_PASS")
441
+ # if not smtp_user or not smtp_pass:
442
+ # raise EnvironmentError("SMTP credentials are not set in environment variables.")
443
+
444
+
445
+ # # Setup model cache path
446
+ # # os.environ["TRANSFORMERS_CACHE"] = os.environ.get("TRANSFORMERS_CACHE", "/app/cache")
447
+ # # os.environ["HF_HOME"] = os.environ.get("HF_HOME", "/app/cache")
448
+
449
+ # # === Profile Save/Load ===
450
+ # PROFILE_DIR = os.path.join(os.getenv("HF_HOME", "/app/cache"), "user_profiles")
451
+ # os.makedirs(PROFILE_DIR, exist_ok=True)
452
+
453
+ # def test_writable_dirs():
454
+ # for path in ["/app/cache", PROFILE_DIR, "/tmp"]:
455
+ # print(f"🔍 Checking write permission for: {path}")
456
+ # if os.access(path, os.W_OK):
457
+ # print("✅ Writable")
458
+ # else:
459
+ # print("❌ Not writable")
460
+
461
+ # test_writable_dirs()
462
+ # # from huggingface_hub import login
463
+
464
+ # # # Load HF token and login
465
+ # hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
466
+ # # if hf_token:
467
+ # # login(token=hf_token, add_to_git_credential=False, write_permission=False)
468
+
469
+ # # === Load Model with CUDA if available and safe cache ===
470
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
471
+ # print(f"🚀 Loading SentenceTransformer on: {device}")
472
+
473
+
474
+
475
+
476
+ # # === Define Cohorts ===
477
+ # COHORTS = {
478
+ # "consulting": "Management consulting, strategy, analytics, client interaction",
479
+ # "bfsi": "Banking, finance, investment analysis, risk, fintech",
480
+ # "sales": "Sales, business development, GTM strategy, CRM, channel sales",
481
+ # "it": "Software development, cloud, AI/ML, backend systems",
482
+ # "hr": "Human resources, L&D, talent acquisition, HRBP",
483
+ # "legal": "Contracts, litigation, compliance, intellectual property",
484
+ # "scm": "Logistics, procurement, inventory, operations, manufacturing",
485
+ # "bpo": "Customer service, support, inbound/outbound calling, operations"
486
+ # }
487
+
488
+
489
+ # # === Helper to extract text from PDF ===
490
+ # def extract_text(file):
491
+ # reader = PdfReader(file)
492
+ # return "\n".join([page.extract_text() or "" for page in reader.pages])
493
+
494
+ # # === Gradio UI function for resume match ===
495
+ # def match_resume(resume_pdf, job_description):
496
+ # text = extract_text(resume_pdf)
497
+ # resume_emb = model.encode(text, convert_to_tensor=True)
498
+ # jd_emb = model.encode(job_description, convert_to_tensor=True)
499
+ # score = util.cos_sim(jd_emb, resume_emb).item() * 100
500
+ # label = "✅ Strong Match" if score > 70 else "⚠️ Needs Tailoring"
501
+ # return f"Match Score: {round(score, 2)}%\n\n{label}"
502
+
503
+ # demo = gr.Interface(
504
+ # fn=match_resume,
505
+ # inputs=[
506
+ # gr.File(label="Upload Resume PDF", file_types=[".pdf"]),
507
+ # gr.Textbox(label="Paste Job Description", lines=6)
508
+ # ],
509
+ # outputs="text",
510
+ # title="🧠 Resume to JD Matcher",
511
+ # description="Upload your resume PDF and paste a job description to get a similarity score and feedback!"
512
+ # )
513
+
514
+ # # === FastAPI App ===
515
+ # fastapi_app = FastAPI()
516
+ # model = None
517
+ # COHORT_EMBEDDINGS = {}
518
+ # # === Load model eagerly before app starts ===
519
+ # print("🕐 Preloading SentenceTransformer model before app declaration...")
520
+
521
+ # model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", use_auth_token=hf_token).to(device)
522
+ # COHORT_EMBEDDINGS = {
523
+ # k: model.encode(v, convert_to_tensor=True) for k, v in COHORTS.items()
524
+ # }
525
+
526
+ # print("✅ Model and cohort embeddings loaded.")
527
+
528
+ # fastapi_app.add_middleware(
529
+ # CORSMiddleware,
530
+ # allow_origins=["*"],
531
+ # allow_methods=["*"],
532
+ # allow_headers=["*"]
533
+ # )
534
+
535
+ # @fastapi_app.post("/debug")
536
+ # async def debug(request: Request):
537
+ # body = await request.body()
538
+ # return {"body": body.decode(), "headers": dict(request.headers)}
539
+
540
+
541
+
542
+ # @fastapi_app.get("/status")
543
+ # async def status():
544
+ # return {"model_loaded": model is not None}
545
+
546
+
547
+
548
+ # # === In-memory OTP store ===
549
+ # OTP_STORE = {}
550
+
551
+ # from pydantic import BaseModel
552
+
553
+ # class EmailRequest(BaseModel):
554
+ # email: str
555
+
556
+ # @fastapi_app.post("/send-otp")
557
+ # async def send_otp(request: EmailRequest):
558
+ # email = request.email
559
+ # otp = str(random.randint(100000, 999999))
560
+ # expiry = datetime.now() + timedelta(minutes=10)
561
+ # OTP_STORE[email] = (otp, expiry)
562
+
563
+ # msg = EmailMessage()
564
+ # msg["Subject"] = "Your ResumePilot Login OTP"
565
+ # msg["From"] = smtp_user
566
+ # msg["To"] = email
567
+ # msg.set_content(f"Your one-time password (OTP) is: {otp}. It will expire in 10 minutes.")
568
+
569
+ # try:
570
+ # with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
571
+ # smtp.login(smtp_user, smtp_pass)
572
+ # smtp.send_message(msg)
573
+ # return {"status": "sent"}
574
+ # except Exception as e:
575
+ # return JSONResponse({"status": "error", "error": str(e)}, status_code=500)
576
+
577
+ # from pydantic import BaseModel
578
+ # from fastapi.responses import JSONResponse
579
+ # from datetime import datetime
580
+ # import random
581
+
582
+ # class OTPVerifyRequest(BaseModel):
583
+ # email: str
584
+ # otp: str
585
+
586
+ # @fastapi_app.post("/verify-otp")
587
+ # async def verify_otp(request: OTPVerifyRequest):
588
+ # email = request.email
589
+ # otp = request.otp
590
+
591
+ # stored = OTP_STORE.get(email)
592
+ # if not stored:
593
+ # return JSONResponse({"error": "No OTP found"}, status_code=400)
594
+
595
+ # saved_otp, expiry = stored
596
+ # if datetime.now() > expiry:
597
+ # return JSONResponse({"error": "OTP expired"}, status_code=401)
598
+
599
+ # if otp != saved_otp:
600
+ # return JSONResponse({"error": "Invalid OTP"}, status_code=401)
601
+
602
+ # # OTP valid — return a token and email
603
+ # return {"token": f"token_{random.randint(100000, 999999)}", "email": email}
604
+
605
+
606
+ # from fastapi import Form
607
+ # from email.message import EmailMessage
608
+ # import smtplib
609
+ # import secrets
610
+ # import os
611
+
612
+ # MAGIC_TOKENS = {} # In-memory token storage
613
+
614
+ # @fastapi_app.post("/send_magic_link")
615
+ # async def send_magic_link(email: str = Form(...)):
616
+ # username = email.split("@")[0]
617
+ # token = secrets.token_urlsafe(16)
618
+ # MAGIC_TOKENS[username] = token
619
+
620
+ # link = f"https://tendermatcher.tech/campus/?token={token}&user={username}"
621
+
622
+
623
+ # msg = EmailMessage()
624
+ # msg["Subject"] = "🔓 Your Magic Login Link"
625
+ # msg["From"] = os.environ["GMAIL_USER"]
626
+ # msg["To"] = email
627
+ # msg.set_content(f"Hi {username},\n\nClick here to log in:\n{link}\n\nCheers,\nResumePilot")
628
+
629
+ # with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
630
+ # smtp.login(os.environ["GMAIL_USER"], os.environ["GMAIL_APP_PASSWORD"])
631
+ # smtp.send_message(msg)
632
+
633
+ # return {"status": "sent", "token": token}
634
+
635
+
636
+ # @fastapi_app.post("/verify_magic_token")
637
+ # async def verify_magic_token(user: str = Form(...), token: str = Form(...)):
638
+ # stored_token = MAGIC_TOKENS.get(user)
639
+ # if not stored_token:
640
+ # return JSONResponse({"error": "No token found for user"}, status_code=400)
641
+ # if token != stored_token:
642
+ # return JSONResponse({"error": "Invalid token"}, status_code=401)
643
+
644
+ # # ✅ Valid magic link
645
+ # return {"status": "verified", "user": user}
646
+
647
+
648
+
649
+
650
+ # # === Resume Matcher endpoint ===
651
+ # @fastapi_app.post("/predict")
652
+ # async def predict(file: UploadFile, jd: str = Form(...), email: str = Form(...)):
653
+ # try:
654
+ # content = await file.read()
655
+ # pdf = io.BytesIO(content)
656
+ # text = extract_text(pdf)
657
+ # resume_emb = model.encode(text, convert_to_tensor=True)
658
+ # jd_emb = model.encode(jd, convert_to_tensor=True)
659
+ # score = util.cos_sim(jd_emb, resume_emb).item() * 100
660
+ # label = "Strong Match" if score > 70 else "Needs Tailoring"
661
+
662
+ # with tempfile.TemporaryDirectory() as tmpdir:
663
+ # upload_to_dropbox(content, f"/spc_cohort_data/{email}/resume.pdf")
664
+ # jd_data = json.dumps({"score": round(score, 2), "feedback": label}).encode("utf-8")
665
+ # upload_to_dropbox(jd_data, f"/spc_cohort_data/{email}/jd_match.json")
666
+
667
+ # return JSONResponse({"score": round(score, 2), "feedback": label, "device": device})
668
+ # except Exception as e:
669
+ # return JSONResponse({"error": str(e)}, status_code=500)
670
+
671
+ # # === Cohort Predictor endpoint ===
672
+ # @fastapi_app.post("/cohort")
673
+ # async def cohort_predict(name: str = Form(...), email: str = Form(...), summary: str = Form(...), quiz: str = Form(...)):
674
+ # try:
675
+ # combined = f"{name}\n{email}\n{summary}"
676
+ # user_emb = model.encode(combined, convert_to_tensor=True)
677
+ # scores = {cohort: util.cos_sim(user_emb, emb).item() for cohort, emb in COHORT_EMBEDDINGS.items()}
678
+ # predicted = max(scores, key=scores.get)
679
+
680
+ # with tempfile.TemporaryDirectory() as tmpdir:
681
+ # quiz_bytes = json.dumps(json.loads(quiz)).encode("utf-8")
682
+ # upload_to_dropbox(quiz_bytes, f"/spc_cohort_data/{email}/quiz.json")
683
+
684
+ # cohort_result = {
685
+ # "predicted_cohort": predicted,
686
+ # "scores": {k: round(v * 100, 2) for k, v in scores.items()}
687
+ # }
688
+ # upload_to_dropbox(json.dumps(cohort_result).encode("utf-8"), f"/spc_cohort_data/{email}/cohort.json")
689
+
690
+ # return JSONResponse(cohort_result)
691
+ # except Exception as e:
692
+ # return JSONResponse({"error": str(e)}, status_code=500)
693
+
694
+
695
+ # from dropbox.exceptions import ApiError
696
+ # import dropbox
697
+
698
+ # DROPBOX_ACCESS_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN")
699
+ # dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN)
700
+ # @fastapi_app.post("/save_profile")
701
+ # async def save_profile(request: Request):
702
+ # form = await request.form()
703
+ # email = form.get("email")
704
+ # full_name = form.get("full_name")
705
+ # job_title = form.get("job_title")
706
+
707
+ # if not email or not full_name or not job_title:
708
+ # return JSONResponse({"error": "Missing profile fields"}, status_code=400)
709
+
710
+ # path = get_profile_path(email)
711
+
712
+ # try:
713
+ # try:
714
+ # _, res = dbx.files_download(path)
715
+ # profile_data = json.loads(res.content)
716
+ # except ApiError:
717
+ # profile_data = {}
718
+
719
+ # profile_data.update({
720
+ # "full_name": full_name,
721
+ # "job_title": job_title
722
+ # })
723
+
724
+ # upload_to_dropbox(
725
+ # json.dumps(profile_data).encode(),
726
+ # path)
727
+
728
+ # return JSONResponse({"success": True})
729
+ # except Exception as e:
730
+ # return JSONResponse({"error": str(e)}, status_code=500)
731
+
732
+ # @fastapi_app.get("/load_profile")
733
+ # async def load_profile(email: str):
734
+ # path = get_profile_path(email)
735
+
736
+ # try:
737
+ # _, res = dbx.files_download(path)
738
+ # profile_data = json.loads(res.content)
739
+ # return JSONResponse({
740
+ # "full_name": profile_data.get("full_name", ""),
741
+ # "job_title": profile_data.get("job_title", "")
742
+ # })
743
+ # except ApiError:
744
+ # return JSONResponse({"full_name": "", "job_title": ""})
745
+ # except Exception as e:
746
+ # return JSONResponse({"error": str(e)}, status_code=500)
747
+
748
+
749
+
750
+
751
+ # def get_profile_path(email: str):
752
+ # return f"/spc_cohort_data/{email}/profile.json"
753
+
754
+ # @fastapi_app.post("/save_theme")
755
+ # async def save_theme(request: Request):
756
+ # form = await request.form()
757
+ # email = form.get("email")
758
+ # theme = form.get("theme")
759
+
760
+ # if not email or not theme:
761
+ # return JSONResponse({"error": "Missing email or theme"}, status_code=400)
762
+
763
+ # path = get_profile_path(email)
764
+
765
+ # try:
766
+ # try:
767
+ # _, res = dbx.files_download(path)
768
+ # profile_data = json.loads(res.content)
769
+ # except ApiError:
770
+ # profile_data = {}
771
+
772
+ # profile_data["theme"] = theme
773
+
774
+
775
+ # upload_to_dropbox(
776
+ # json.dumps(profile_data).encode(),
777
+ # path
778
+ # )
779
+
780
+ # return JSONResponse({"success": True})
781
+ # except Exception as e:
782
+ # return JSONResponse({"error": str(e)}, status_code=500)
783
+
784
+ # @fastapi_app.get("/load_theme")
785
+ # async def load_theme(email: str):
786
+ # path = get_profile_path(email)
787
+
788
+ # try:
789
+ # _, res = dbx.files_download(path)
790
+ # profile_data = json.loads(res.content)
791
+ # return JSONResponse({"theme": profile_data.get("theme", "light")})
792
+ # except ApiError:
793
+ # return JSONResponse({"theme": "light"})
794
+ # except Exception as e:
795
+ # return JSONResponse({"error": str(e)}, status_code=500)
796
+
797
+
798
+
799
+
800
+ # # === Log Endpoint ===
801
+ # @fastapi_app.post("/log")
802
+ # async def receive_log(request: Request):
803
+ # try:
804
+ # payload = await request.json()
805
+ # timestamp = datetime.now(timezone.utc).astimezone(pytz.timezone("Asia/Kolkata")).isoformat()
806
+ # payload["logged_at"] = timestamp
807
+
808
+ # log_line = json.dumps(payload, ensure_ascii=False) + "\n"
809
+ # today = datetime.now().strftime("%Y-%m-%d")
810
+ # log_path = f"/spc_cohort_data/logs/{today}.jsonl"
811
+ # upload_to_dropbox(log_line.encode("utf-8"), log_path, append=True)
812
+
813
+ # return {"status": "logged"}
814
+ # except Exception as e:
815
+ # return JSONResponse({"error": str(e)}, status_code=500)
816
+
817
+ # # === List routes endpoint ===
818
+ # @fastapi_app.get("/routes")
819
+ # async def list_routes():
820
+ # return [
821
+ # {
822
+ # "path": route.path,
823
+ # "methods": list(route.methods),
824
+ # "name": route.name
825
+ # }
826
+ # for route in fastapi_app.routes
827
+ # if hasattr(route, "methods")
828
+ # ]
829
+
830
+ # @fastapi_app.get("/")
831
+ # async def root_redirect():
832
+ # return JSONResponse({"message": "Visit /ui for the resume matcher and API routes"})
833
+
834
+ # @fastapi_app.get("/health")
835
+ # async def health():
836
+ # return {"status": "ok"}
837
+
838
+
839
+ # # === Gradio UI for listing routes ===
840
+ # from fastapi.testclient import TestClient
841
+
842
+ # client = TestClient(fastapi_app)
843
+
844
+ # def get_routes_str():
845
+ # response = client.get("/routes")
846
+ # if response.status_code == 200:
847
+ # return json.dumps(response.json(), indent=2)
848
+ # else:
849
+ # return f"Error fetching routes: {response.status_code}"
850
+
851
+
852
+ # routes_demo = gr.Interface(
853
+ # fn=get_routes_str,
854
+ # inputs=[],
855
+ # outputs="textbox",
856
+ # title="API Routes",
857
+ # description="List of all API routes exposed by FastAPI backend"
858
+ # )
859
+
860
+ # with gr.Blocks() as ui:
861
+ # with gr.Tabs():
862
+ # with gr.TabItem("Resume Matcher"):
863
+ # demo.render()
864
+ # with gr.TabItem("API Routes"):
865
+ # routes_demo.render()
866
+
867
+
868
+ # # Mount the combined UI at /ui
869
+ # app = fastapi_app
870
+
871
+ # if __name__ == "__main__":
872
+ # import os
873
+ # import uvicorn
874
+ # ui.launch(server_name="0.0.0.0", server_port=7860)