fix: ensure vector database cleanup on dataset deletion regardless of document presence (affects all 33 vector databases) (#23574)

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
yunqiqiliang
2025-08-08 09:18:43 +08:00
committed by GitHub
parent 4b0480c8b3
commit 62772e8871
5 changed files with 97 additions and 105 deletions

View File

@@ -56,15 +56,17 @@ def clean_dataset_task(
documents = db.session.query(Document).where(Document.dataset_id == dataset_id).all()
segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all()
# Fix: Always clean vector database resources regardless of document existence
# This ensures all 33 vector databases properly drop tables/collections/indices
if doc_form is None:
raise ValueError("Index type must be specified.")
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
if documents is None or len(documents) == 0:
logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green"))
else:
logging.info(click.style(f"Cleaning documents for dataset: {dataset_id}", fg="green"))
# Specify the index type before initializing the index processor
if doc_form is None:
raise ValueError("Index type must be specified.")
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
for document in documents:
db.session.delete(document)