feature. add feat to modify metadata via dataset api (#13116)

This commit is contained in:
aplio
2025-02-02 16:27:12 +09:00
committed by GitHub
parent c2664e0283
commit d73d191f99
5 changed files with 522 additions and 0 deletions

View File

@@ -42,6 +42,7 @@ from models.source import DataSourceOauthBinding
from services.entities.knowledge_entities.knowledge_entities import (
ChildChunkUpdateArgs,
KnowledgeConfig,
MetaDataConfig,
RerankingModel,
RetrievalModel,
SegmentUpdateArgs,
@@ -894,6 +895,9 @@ class DocumentService:
document.data_source_info = json.dumps(data_source_info)
document.batch = batch
document.indexing_status = "waiting"
if knowledge_config.metadata:
document.doc_type = knowledge_config.metadata.doc_type
document.metadata = knowledge_config.metadata.doc_metadata
db.session.add(document)
documents.append(document)
duplicate_document_ids.append(document.id)
@@ -910,6 +914,7 @@ class DocumentService:
account,
file_name,
batch,
knowledge_config.metadata,
)
db.session.add(document)
db.session.flush()
@@ -965,6 +970,7 @@ class DocumentService:
account,
page.page_name,
batch,
knowledge_config.metadata,
)
db.session.add(document)
db.session.flush()
@@ -1005,6 +1011,7 @@ class DocumentService:
account,
document_name,
batch,
knowledge_config.metadata,
)
db.session.add(document)
db.session.flush()
@@ -1042,6 +1049,7 @@ class DocumentService:
account: Account,
name: str,
batch: str,
metadata: Optional[MetaDataConfig] = None,
):
document = Document(
tenant_id=dataset.tenant_id,
@@ -1057,6 +1065,9 @@ class DocumentService:
doc_form=document_form,
doc_language=document_language,
)
if metadata is not None:
document.doc_metadata = metadata.doc_metadata
document.doc_type = metadata.doc_type
return document
@staticmethod
@@ -1169,6 +1180,10 @@ class DocumentService:
# update document name
if document_data.name:
document.name = document_data.name
# update doc_type and doc_metadata if provided
if document_data.metadata is not None:
document.doc_metadata = document_data.metadata.doc_type
document.doc_type = document_data.metadata.doc_type
# update document to be waiting
document.indexing_status = "waiting"
document.completed_at = None