Spaces:
Running
Running
| from langchain_groq import ChatGroq | |
| from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def load_resume(file_path): | |
| """Load resume from PDF, DOCX, or TXT""" | |
| if file_path.endswith('.pdf'): | |
| loader = PyPDFLoader(file_path) | |
| elif file_path.endswith('.docx'): | |
| loader = UnstructuredWordDocumentLoader(file_path) | |
| elif file_path.endswith('.txt'): | |
| loader = TextLoader(file_path) | |
| else: | |
| raise ValueError("Supported formats: PDF, DOCX, TXT") | |
| return loader.load() | |
| def create_resume_qa_system(resume_file_path): | |
| """Create complete resume Q&A system""" | |
| # 1. Load and split resume | |
| docs = load_resume(resume_file_path) | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=800, | |
| chunk_overlap=100 | |
| ) | |
| splits = text_splitter.split_documents(docs) | |
| # 2. Create embeddings and vector store | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="sentence-transformers/all-MiniLM-L6-v2" | |
| ) | |
| vectorstore = FAISS.from_documents(splits, embeddings) | |
| # 3. Setup LLM | |
| llm = ChatGroq( | |
| api_key=os.getenv('GROQ_API_KEY'), | |
| model=os.getenv('GROQ_MODEL', 'llama-3.1-8b-instant'), | |
| temperature=0.1 | |
| ) | |
| # 4. Retrieval chain | |
| retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) | |
| template = """Use the following resume context to answer the question. | |
| If you don't know the answer, say so. Answer concisely and accurately. | |
| Context: {context} | |
| Question: {question} | |
| Answer:""" | |
| prompt = ChatPromptTemplate.from_template(template) | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return chain | |
| # Usage | |
| if __name__ == "__main__": | |
| # Replace with your resume path | |
| qa_chain = create_resume_qa_system("path/to/your/resume.pdf") | |
| # Ask questions | |
| questions = [ | |
| "What is my experience with Microsoft Fabric?", | |
| "List my technical skills", | |
| "What certifications do I have?", | |
| "Describe my Databricks projects" | |
| ] | |
| for question in questions: | |
| answer = qa_chain.invoke(question) | |
| print(f"Q: {question}\nA: {answer}\n{'-' * 50}") | |