Spaces:

srivathsa96
/

Portfolio

Running

Portfolio / main.py

Srivatsa

Initial commit

4025ad0 2 days ago

2.81 kB

	from langchain_groq import ChatGroq
	from langchain_community.document_loaders import PyPDFLoader, UnstructuredWordDocumentLoader, TextLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough
	import os
	from dotenv import load_dotenv

	load_dotenv()


	def load_resume(file_path):
	"""Load resume from PDF, DOCX, or TXT"""
	if file_path.endswith('.pdf'):
	loader = PyPDFLoader(file_path)
	elif file_path.endswith('.docx'):
	loader = UnstructuredWordDocumentLoader(file_path)
	elif file_path.endswith('.txt'):
	loader = TextLoader(file_path)
	else:
	raise ValueError("Supported formats: PDF, DOCX, TXT")

	return loader.load()


	def create_resume_qa_system(resume_file_path):
	"""Create complete resume Q&A system"""

	# 1. Load and split resume
	docs = load_resume(resume_file_path)
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=800,
	chunk_overlap=100
	)
	splits = text_splitter.split_documents(docs)

	# 2. Create embeddings and vector store
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)
	vectorstore = FAISS.from_documents(splits, embeddings)

	# 3. Setup LLM
	llm = ChatGroq(
	api_key=os.getenv('GROQ_API_KEY'),
	model=os.getenv('GROQ_MODEL', 'llama-3.1-8b-instant'),
	temperature=0.1
	)

	# 4. Retrieval chain
	retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

	template = """Use the following resume context to answer the question.
	If you don't know the answer, say so. Answer concisely and accurately.

	Context: {context}

	Question: {question}

	Answer:"""

	prompt = ChatPromptTemplate.from_template(template)

	def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

	chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	return chain


	# Usage
	if __name__ == "__main__":
	# Replace with your resume path
	qa_chain = create_resume_qa_system("path/to/your/resume.pdf")

	# Ask questions
	questions = [
	"What is my experience with Microsoft Fabric?",
	"List my technical skills",
	"What certifications do I have?",
	"Describe my Databricks projects"
	]

	for question in questions:
	answer = qa_chain.invoke(question)
	print(f"Q: {question}\nA: {answer}\n{'-' * 50}")