Spaces:
Runtime error
Runtime error
Mostafa commited on
Upload folder using huggingface_hub
Browse files- src/document_processor.py +0 -1
- src/vector_store.py +2 -22
src/document_processor.py
CHANGED
|
@@ -88,7 +88,6 @@ class ChunkMetadata:
|
|
| 88 |
|
| 89 |
@dataclass
|
| 90 |
class DocumentChunk:
|
| 91 |
-
"""Represents a chunk of processed document content."""
|
| 92 |
content: str
|
| 93 |
metadata: ChunkMetadata
|
| 94 |
embedding: Optional[List[float]] = None
|
|
|
|
| 88 |
|
| 89 |
@dataclass
|
| 90 |
class DocumentChunk:
|
|
|
|
| 91 |
content: str
|
| 92 |
metadata: ChunkMetadata
|
| 93 |
embedding: Optional[List[float]] = None
|
src/vector_store.py
CHANGED
|
@@ -13,21 +13,9 @@ load_dotenv()
|
|
| 13 |
|
| 14 |
|
| 15 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
| 16 |
try:
|
| 17 |
from logger.custom_logger import CustomLoggerTracker
|
| 18 |
-
from src.config import Config
|
| 19 |
-
|
| 20 |
-
from src.ingestion_pipeline import DocumentIngestionPipeline
|
| 21 |
-
from src.rag_engine import RAGEngine
|
| 22 |
-
from src.metadata_manager import MetadataManager
|
| 23 |
-
from src.document_processor import ( ChunkMetadata,
|
| 24 |
-
DocumentProcessorFactory,
|
| 25 |
-
DocumentType,
|
| 26 |
-
DocumentChunk,
|
| 27 |
-
DocumentProcessor)
|
| 28 |
-
from src.pdf_processor import PDFProcessor
|
| 29 |
-
from src.excel_processor import ExcelProcessor
|
| 30 |
-
from src.image_processor import ImageProcessor
|
| 31 |
custom_log = CustomLoggerTracker()
|
| 32 |
logger = custom_log.get_logger("vector_store")
|
| 33 |
|
|
@@ -446,12 +434,4 @@ if __name__=="__main__":
|
|
| 446 |
if health:
|
| 447 |
logger.info("Vector store is healthy and ready.")
|
| 448 |
else:
|
| 449 |
-
logger.error("Vector store is not accessible.")
|
| 450 |
-
|
| 451 |
-
# Register processors
|
| 452 |
-
DocumentProcessorFactory.register_processor(DocumentType.PDF, PDFProcessor)
|
| 453 |
-
DocumentProcessorFactory.register_processor(DocumentType.EXCEL, ExcelProcessor)
|
| 454 |
-
DocumentProcessorFactory.register_processor(DocumentType.IMAGE, ImageProcessor)
|
| 455 |
-
|
| 456 |
-
file_path = "/Users/ahmedmostafa/Downloads/eval_Korean_qa/data/documents/4.외주입고현황.xlsx"
|
| 457 |
-
document_processor = DocumentProcessor(file_path)
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 16 |
+
from src.document_processor import DocumentChunk, ChunkMetadata
|
| 17 |
try:
|
| 18 |
from logger.custom_logger import CustomLoggerTracker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
custom_log = CustomLoggerTracker()
|
| 20 |
logger = custom_log.get_logger("vector_store")
|
| 21 |
|
|
|
|
| 434 |
if health:
|
| 435 |
logger.info("Vector store is healthy and ready.")
|
| 436 |
else:
|
| 437 |
+
logger.error("Vector store is not accessible.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|