"""Skill extraction from CV text using LLM.""" from __future__ import annotations from typing import Any, Dict, List from LLM.llm_models import cv_analyzer_model # Using CV-specific model (same as orchestrator) import json import re def _print_terminal_log(action: str, details: str = ""): """Print formatted log to terminal.""" timestamp = __import__('datetime').datetime.now().strftime("%H:%M:%S") if details: print(f"[{timestamp}] [CV ANALYZER] {action} :: {details}") else: print(f"[{timestamp}] [CV ANALYZER] {action}") def extract_skills_from_cv_text(cv_text: str, log_callback=None) -> Dict[str, Any]: """ Extract skills and relevant information from CV text using LLM. Args: cv_text: The extracted text content from a CV Returns: Dictionary containing extracted skills and candidate information """ prompt = f"""Analyze the following CV/Resume text and extract ALL relevant information in a structured format. CV TEXT: {cv_text} Please extract and organize the following information: 1. TECHNICAL SKILLS: Programming languages, frameworks, tools, technologies 2. SOFT SKILLS: Communication, leadership, teamwork, problem-solving, etc. 3. PROFESSIONAL EXPERIENCE: Years of experience, job titles, companies 4. EDUCATION: Degrees, certifications, institutions 5. DOMAIN EXPERTISE: Industries, specific domains (e.g., Finance, Healthcare, AI/ML) Return your analysis in the following JSON-like structure: {{ "technical_skills": ["skill1", "skill2", ...], "soft_skills": ["skill1", "skill2", ...], "experience_years": , "recent_roles": ["role1", "role2", ...], "education": ["degree1", "degree2", ...], "certifications": ["cert1", "cert2", ...], "domain_expertise": ["domain1", "domain2", ...], "summary": "A brief 2-3 sentence summary of the candidate's profile" }} Be thorough and extract as many relevant skills as possible. If information is not available, use empty arrays or "unknown".""" try: _print_terminal_log("Starting AI skill extraction from CV text") if log_callback: log_callback("AI Skill Extraction", {"status": "Initializing LLM model..."}) # Use the CV analyzer model (same provider as orchestrator - HF/Gemini) model = cv_analyzer_model _print_terminal_log("LLM Initialized", f"Model ready, CV length: {len(cv_text)} chars") if log_callback: log_callback("AI Analysis", {"status": "Sending CV to AI for analysis", "cv_length": len(cv_text)}) messages = [ { "role": "system", "content": "You are an expert HR analyst specializing in CV/Resume analysis and skill extraction. Extract information accurately and comprehensively." }, { "role": "user", "content": prompt } ] if log_callback: log_callback("LLM Request", {"message_count": len(messages), "model": "cv_analyzer_model"}) _print_terminal_log("Sending request to AI", "Waiting for skill extraction...") response = model.generate(messages=messages) # Handle ChatMessage object - convert to string if hasattr(response, 'content'): response_text = response.content else: response_text = str(response) _print_terminal_log("AI Response Received", f"Response length: {len(response_text) if response_text else 0} chars") if log_callback: log_callback("AI Response Received", {"response_length": len(response_text) if response_text else 0}) # Extract JSON from response (handle markdown code blocks) if log_callback: log_callback("Parsing AI Response", {"status": "Extracting structured data from AI response"}) json_match = re.search(r'\{[\s\S]*\}', response_text) if json_match: if log_callback: log_callback("JSON Extraction", {"status": "Found JSON in response, parsing..."}) _print_terminal_log("Parsing JSON response", "Extracting structured skill data...") skills_data = json.loads(json_match.group()) tech_count = len(skills_data.get("technical_skills", [])) soft_count = len(skills_data.get("soft_skills", [])) _print_terminal_log("Skills Extracted Successfully", f"Technical: {tech_count}, Soft: {soft_count}, Total: {tech_count + soft_count}") if log_callback: log_callback("Skills Parsed Successfully", { "technical_skills": tech_count, "soft_skills": soft_count, "total_skills": tech_count + soft_count }) else: if log_callback: log_callback("JSON Extraction Failed", {"status": "No JSON found, using fallback structure"}) # Fallback: return a basic structure with the raw response skills_data = { "technical_skills": [], "soft_skills": [], "experience_years": "unknown", "recent_roles": [], "education": [], "certifications": [], "domain_expertise": [], "summary": response_text[:500] # First 500 chars } _print_terminal_log("✅ CV Analysis Complete", "All skills successfully extracted and structured") if log_callback: log_callback("✅ Extraction Complete", {"status": "CV processing finished successfully"}) return skills_data except Exception as e: error_msg = str(e) _print_terminal_log(f"❌ ERROR: {type(e).__name__}", error_msg) if log_callback: log_callback("❌ AI Extraction Error", {"error": error_msg, "type": type(e).__name__}) # Return error information return { "error": error_msg, "technical_skills": [], "soft_skills": [], "experience_years": "unknown", "recent_roles": [], "education": [], "certifications": [], "domain_expertise": [], "summary": f"Failed to extract skills: {error_msg}" } def format_skills_for_display(skills_data: Dict[str, Any]) -> str: """ Format extracted skills data into HTML for display in Gradio. Args: skills_data: Dictionary containing extracted skills Returns: HTML string for display """ if "error" in skills_data: return f"""

⚠️ Error Extracting Skills

{skills_data.get('summary', 'Unknown error')}

""" html_parts = [ '

', '

', '📄 CV ANALYSIS COMPLETE', '

', ] # Summary if skills_data.get("summary"): html_parts.append(f'

') html_parts.append(f'

{skills_data["summary"]}

') html_parts.append('

') # Technical Skills if skills_data.get("technical_skills"): html_parts.append('

') html_parts.append('

💻 TECHNICAL SKILLS

') html_parts.append('

') for skill in skills_data["technical_skills"]: html_parts.append( f'{skill}' ) html_parts.append('

') # Soft Skills if skills_data.get("soft_skills"): html_parts.append('

') html_parts.append('

🤝 SOFT SKILLS

') html_parts.append('

') for skill in skills_data["soft_skills"]: html_parts.append( f'{skill}' ) html_parts.append('

') # Experience & Roles if skills_data.get("experience_years") or skills_data.get("recent_roles"): html_parts.append('

') html_parts.append('

💼 EXPERIENCE

') if skills_data.get("experience_years"): html_parts.append(f'

Years: {skills_data["experience_years"]}

') if skills_data.get("recent_roles"): html_parts.append('

Recent Roles:

') html_parts.append('

{role}

') html_parts.append('

') # Education if skills_data.get("education") or skills_data.get("certifications"): html_parts.append('

') html_parts.append('

🎓 EDUCATION & CERTIFICATIONS

') if skills_data.get("education"): html_parts.append('

Education:

') html_parts.append('

{edu}

') if skills_data.get("certifications"): html_parts.append('

Certifications:

') html_parts.append('

{cert}

') html_parts.append('

') # Domain Expertise if skills_data.get("domain_expertise"): html_parts.append('

') html_parts.append('

🎯 DOMAIN EXPERTISE

') html_parts.append('

') for domain in skills_data["domain_expertise"]: html_parts.append( f'{domain}' ) html_parts.append('

') html_parts.append('

') return ''.join(html_parts)