diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 603f7555d..6a23f3cfe 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -21,7 +21,6 @@ from core.rag.extractor.unstructured.unstructured_eml_extractor import Unstructu from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor -from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor @@ -103,7 +102,7 @@ class ExtractProcessor: if file_extension in {".xlsx", ".xls"}: extractor = ExcelExtractor(file_path) elif file_extension == ".pdf": - extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key) + extractor = PdfExtractor(file_path) elif file_extension in {".md", ".markdown"}: extractor = ( UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)