Spaces:
Running
Running
batuhanozkose
feat: Implement initial PaperCast application with core modules, documentation, a periodic curl script, and a Gradio certificate.
472739a
| import fitz # PyMuPDF | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| """ | |
| Extracts text from a PDF file using PyMuPDF. | |
| Args: | |
| pdf_path (str): Path to the PDF file. | |
| Returns: | |
| str: Extracted text content. | |
| """ | |
| try: | |
| doc = fitz.open(pdf_path) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| except Exception as e: | |
| print(f"Error reading PDF {pdf_path}: {e}") | |
| return "" | |