Refactor: use DatasourceType.XX.value instead of hardcoded (#25015)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -526,7 +526,6 @@ class PublishedWorkflowApi(Resource):
|
|||||||
)
|
)
|
||||||
|
|
||||||
app_model.workflow_id = workflow.id
|
app_model.workflow_id = workflow.id
|
||||||
db.session.commit()
|
|
||||||
|
|
||||||
workflow_created_at = TimestampField().format(workflow.created_at)
|
workflow_created_at = TimestampField().format(workflow.created_at)
|
||||||
|
|
||||||
|
@@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound
|
|||||||
from controllers.console import api
|
from controllers.console import api
|
||||||
from controllers.console.wraps import account_initialization_required, setup_required
|
from controllers.console.wraps import account_initialization_required, setup_required
|
||||||
from core.indexing_runner import IndexingRunner
|
from core.indexing_runner import IndexingRunner
|
||||||
|
from core.rag.extractor.entity.datasource_type import DatasourceType
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.extractor.notion_extractor import NotionExtractor
|
from core.rag.extractor.notion_extractor import NotionExtractor
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@@ -214,7 +215,7 @@ class DataSourceNotionApi(Resource):
|
|||||||
workspace_id = notion_info["workspace_id"]
|
workspace_id = notion_info["workspace_id"]
|
||||||
for page in notion_info["pages"]:
|
for page in notion_info["pages"]:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="notion_import",
|
datasource_type=DatasourceType.NOTION.value,
|
||||||
notion_info={
|
notion_info={
|
||||||
"notion_workspace_id": workspace_id,
|
"notion_workspace_id": workspace_id,
|
||||||
"notion_obj_id": page["page_id"],
|
"notion_obj_id": page["page_id"],
|
||||||
|
@@ -22,6 +22,7 @@ from core.model_runtime.entities.model_entities import ModelType
|
|||||||
from core.plugin.entities.plugin import ModelProviderID
|
from core.plugin.entities.plugin import ModelProviderID
|
||||||
from core.provider_manager import ProviderManager
|
from core.provider_manager import ProviderManager
|
||||||
from core.rag.datasource.vdb.vector_type import VectorType
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.extractor.entity.datasource_type import DatasourceType
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@@ -422,7 +423,9 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
if file_details:
|
if file_details:
|
||||||
for file_detail in file_details:
|
for file_detail in file_details:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="upload_file", upload_file=file_detail, document_model=args["doc_form"]
|
datasource_type=DatasourceType.FILE.value,
|
||||||
|
upload_file=file_detail,
|
||||||
|
document_model=args["doc_form"],
|
||||||
)
|
)
|
||||||
extract_settings.append(extract_setting)
|
extract_settings.append(extract_setting)
|
||||||
elif args["info_list"]["data_source_type"] == "notion_import":
|
elif args["info_list"]["data_source_type"] == "notion_import":
|
||||||
@@ -431,7 +434,7 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
workspace_id = notion_info["workspace_id"]
|
workspace_id = notion_info["workspace_id"]
|
||||||
for page in notion_info["pages"]:
|
for page in notion_info["pages"]:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="notion_import",
|
datasource_type=DatasourceType.NOTION.value,
|
||||||
notion_info={
|
notion_info={
|
||||||
"notion_workspace_id": workspace_id,
|
"notion_workspace_id": workspace_id,
|
||||||
"notion_obj_id": page["page_id"],
|
"notion_obj_id": page["page_id"],
|
||||||
@@ -445,7 +448,7 @@ class DatasetIndexingEstimateApi(Resource):
|
|||||||
website_info_list = args["info_list"]["website_info_list"]
|
website_info_list = args["info_list"]["website_info_list"]
|
||||||
for url in website_info_list["urls"]:
|
for url in website_info_list["urls"]:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="website_crawl",
|
datasource_type=DatasourceType.WEBSITE.value,
|
||||||
website_info={
|
website_info={
|
||||||
"provider": website_info_list["provider"],
|
"provider": website_info_list["provider"],
|
||||||
"job_id": website_info_list["job_id"],
|
"job_id": website_info_list["job_id"],
|
||||||
|
@@ -40,6 +40,7 @@ from core.model_manager import ModelManager
|
|||||||
from core.model_runtime.entities.model_entities import ModelType
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||||
from core.plugin.impl.exc import PluginDaemonClientSideError
|
from core.plugin.impl.exc import PluginDaemonClientSideError
|
||||||
|
from core.rag.extractor.entity.datasource_type import DatasourceType
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from fields.document_fields import (
|
from fields.document_fields import (
|
||||||
@@ -425,7 +426,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
|
|||||||
raise NotFound("File not found.")
|
raise NotFound("File not found.")
|
||||||
|
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="upload_file", upload_file=file, document_model=document.doc_form
|
datasource_type=DatasourceType.FILE.value, upload_file=file, document_model=document.doc_form
|
||||||
)
|
)
|
||||||
|
|
||||||
indexing_runner = IndexingRunner()
|
indexing_runner = IndexingRunner()
|
||||||
@@ -485,13 +486,13 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
|
|||||||
raise NotFound("File not found.")
|
raise NotFound("File not found.")
|
||||||
|
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="upload_file", upload_file=file_detail, document_model=document.doc_form
|
datasource_type=DatasourceType.FILE.value, upload_file=file_detail, document_model=document.doc_form
|
||||||
)
|
)
|
||||||
extract_settings.append(extract_setting)
|
extract_settings.append(extract_setting)
|
||||||
|
|
||||||
elif document.data_source_type == "notion_import":
|
elif document.data_source_type == "notion_import":
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="notion_import",
|
datasource_type=DatasourceType.NOTION.value,
|
||||||
notion_info={
|
notion_info={
|
||||||
"notion_workspace_id": data_source_info["notion_workspace_id"],
|
"notion_workspace_id": data_source_info["notion_workspace_id"],
|
||||||
"notion_obj_id": data_source_info["notion_page_id"],
|
"notion_obj_id": data_source_info["notion_page_id"],
|
||||||
@@ -503,7 +504,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
|
|||||||
extract_settings.append(extract_setting)
|
extract_settings.append(extract_setting)
|
||||||
elif document.data_source_type == "website_crawl":
|
elif document.data_source_type == "website_crawl":
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="website_crawl",
|
datasource_type=DatasourceType.WEBSITE.value,
|
||||||
website_info={
|
website_info={
|
||||||
"provider": data_source_info["provider"],
|
"provider": data_source_info["provider"],
|
||||||
"job_id": data_source_info["job_id"],
|
"job_id": data_source_info["job_id"],
|
||||||
|
@@ -19,6 +19,7 @@ from core.model_runtime.entities.model_entities import ModelType
|
|||||||
from core.rag.cleaner.clean_processor import CleanProcessor
|
from core.rag.cleaner.clean_processor import CleanProcessor
|
||||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||||
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
from core.rag.docstore.dataset_docstore import DatasetDocumentStore
|
||||||
|
from core.rag.extractor.entity.datasource_type import DatasourceType
|
||||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||||
from core.rag.index_processor.constant.index_type import IndexType
|
from core.rag.index_processor.constant.index_type import IndexType
|
||||||
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
from core.rag.index_processor.index_processor_base import BaseIndexProcessor
|
||||||
@@ -340,7 +341,9 @@ class IndexingRunner:
|
|||||||
|
|
||||||
if file_detail:
|
if file_detail:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="upload_file", upload_file=file_detail, document_model=dataset_document.doc_form
|
datasource_type=DatasourceType.FILE.value,
|
||||||
|
upload_file=file_detail,
|
||||||
|
document_model=dataset_document.doc_form,
|
||||||
)
|
)
|
||||||
text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])
|
text_docs = index_processor.extract(extract_setting, process_rule_mode=process_rule["mode"])
|
||||||
elif dataset_document.data_source_type == "notion_import":
|
elif dataset_document.data_source_type == "notion_import":
|
||||||
@@ -351,7 +354,7 @@ class IndexingRunner:
|
|||||||
):
|
):
|
||||||
raise ValueError("no notion import info found")
|
raise ValueError("no notion import info found")
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="notion_import",
|
datasource_type=DatasourceType.NOTION.value,
|
||||||
notion_info={
|
notion_info={
|
||||||
"notion_workspace_id": data_source_info["notion_workspace_id"],
|
"notion_workspace_id": data_source_info["notion_workspace_id"],
|
||||||
"notion_obj_id": data_source_info["notion_page_id"],
|
"notion_obj_id": data_source_info["notion_page_id"],
|
||||||
@@ -371,7 +374,7 @@ class IndexingRunner:
|
|||||||
):
|
):
|
||||||
raise ValueError("no website import info found")
|
raise ValueError("no website import info found")
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="website_crawl",
|
datasource_type=DatasourceType.WEBSITE.value,
|
||||||
website_info={
|
website_info={
|
||||||
"provider": data_source_info["provider"],
|
"provider": data_source_info["provider"],
|
||||||
"job_id": data_source_info["job_id"],
|
"job_id": data_source_info["job_id"],
|
||||||
|
@@ -45,7 +45,7 @@ class ExtractProcessor:
|
|||||||
cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
|
cls, upload_file: UploadFile, return_text: bool = False, is_automatic: bool = False
|
||||||
) -> Union[list[Document], str]:
|
) -> Union[list[Document], str]:
|
||||||
extract_setting = ExtractSetting(
|
extract_setting = ExtractSetting(
|
||||||
datasource_type="upload_file", upload_file=upload_file, document_model="text_model"
|
datasource_type=DatasourceType.FILE.value, upload_file=upload_file, document_model="text_model"
|
||||||
)
|
)
|
||||||
if return_text:
|
if return_text:
|
||||||
delimiter = "\n"
|
delimiter = "\n"
|
||||||
@@ -76,7 +76,7 @@ class ExtractProcessor:
|
|||||||
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
|
# https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521
|
||||||
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
|
file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}"
|
||||||
Path(file_path).write_bytes(response.content)
|
Path(file_path).write_bytes(response.content)
|
||||||
extract_setting = ExtractSetting(datasource_type="upload_file", document_model="text_model")
|
extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE.value, document_model="text_model")
|
||||||
if return_text:
|
if return_text:
|
||||||
delimiter = "\n"
|
delimiter = "\n"
|
||||||
return delimiter.join(
|
return delimiter.join(
|
||||||
|
Reference in New Issue
Block a user