File size: 2,805 Bytes
4025ad0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from langchain_groq import ChatGroq
from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import os
from dotenv import load_dotenv

load_dotenv()


def load_resume(file_path):
    """Load resume from PDF, DOCX, or TXT"""
    if file_path.endswith('.pdf'):
        loader = PyPDFLoader(file_path)
    elif file_path.endswith('.docx'):
        loader = UnstructuredWordDocumentLoader(file_path)
    elif file_path.endswith('.txt'):
        loader = TextLoader(file_path)
    else:
        raise ValueError("Supported formats: PDF, DOCX, TXT")

    return loader.load()


def create_resume_qa_system(resume_file_path):
    """Create complete resume Q&A system"""

    # 1. Load and split resume
    docs = load_resume(resume_file_path)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100
    )
    splits = text_splitter.split_documents(docs)

    # 2. Create embeddings and vector store
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    vectorstore = FAISS.from_documents(splits, embeddings)

    # 3. Setup LLM
    llm = ChatGroq(
        api_key=os.getenv('GROQ_API_KEY'),
        model=os.getenv('GROQ_MODEL', 'llama-3.1-8b-instant'),
        temperature=0.1
    )

    # 4. Retrieval chain
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

    template = """Use the following resume context to answer the question. 
If you don't know the answer, say so. Answer concisely and accurately.

Context: {context}

Question: {question}

Answer:"""

    prompt = ChatPromptTemplate.from_template(template)

    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
    )

    return chain


# Usage
if __name__ == "__main__":
    # Replace with your resume path
    qa_chain = create_resume_qa_system("path/to/your/resume.pdf")

    # Ask questions
    questions = [
        "What is my experience with Microsoft Fabric?",
        "List my technical skills",
        "What certifications do I have?",
        "Describe my Databricks projects"
    ]

    for question in questions:
        answer = qa_chain.invoke(question)
        print(f"Q: {question}\nA: {answer}\n{'-' * 50}")