remove unstructured pdf extract (#9794)
This commit is contained in:
@@ -21,7 +21,6 @@ from core.rag.extractor.unstructured.unstructured_eml_extractor import Unstructu
|
|||||||
from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
|
from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor
|
||||||
from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
|
from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor
|
||||||
from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
|
from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor
|
||||||
from core.rag.extractor.unstructured.unstructured_pdf_extractor import UnstructuredPDFExtractor
|
|
||||||
from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
|
from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor
|
||||||
from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
|
from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor
|
||||||
from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
|
from core.rag.extractor.unstructured.unstructured_text_extractor import UnstructuredTextExtractor
|
||||||
@@ -103,7 +102,7 @@ class ExtractProcessor:
|
|||||||
if file_extension in {".xlsx", ".xls"}:
|
if file_extension in {".xlsx", ".xls"}:
|
||||||
extractor = ExcelExtractor(file_path)
|
extractor = ExcelExtractor(file_path)
|
||||||
elif file_extension == ".pdf":
|
elif file_extension == ".pdf":
|
||||||
extractor = UnstructuredPDFExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
extractor = PdfExtractor(file_path)
|
||||||
elif file_extension in {".md", ".markdown"}:
|
elif file_extension in {".md", ".markdown"}:
|
||||||
extractor = (
|
extractor = (
|
||||||
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||||
|
Reference in New Issue
Block a user