fix: Enhance doc_form null checking, exception handling, and rollback logic (#23713)
This commit is contained in:
@@ -56,19 +56,29 @@ def clean_dataset_task(
|
|||||||
documents = db.session.query(Document).where(Document.dataset_id == dataset_id).all()
|
documents = db.session.query(Document).where(Document.dataset_id == dataset_id).all()
|
||||||
segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all()
|
segments = db.session.query(DocumentSegment).where(DocumentSegment.dataset_id == dataset_id).all()
|
||||||
|
|
||||||
# Fix: Always clean vector database resources regardless of document existence
|
# Enhanced validation: Check if doc_form is None, empty string, or contains only whitespace
|
||||||
# This ensures all 33 vector databases properly drop tables/collections/indices
|
# This ensures all invalid doc_form values are properly handled
|
||||||
if doc_form is None:
|
if doc_form is None or (isinstance(doc_form, str) and not doc_form.strip()):
|
||||||
# Use default paragraph index type for empty datasets to enable vector database cleanup
|
# Use default paragraph index type for empty/invalid datasets to enable vector database cleanup
|
||||||
from core.rag.index_processor.constant.index_type import IndexType
|
from core.rag.index_processor.constant.index_type import IndexType
|
||||||
|
|
||||||
doc_form = IndexType.PARAGRAPH_INDEX
|
doc_form = IndexType.PARAGRAPH_INDEX
|
||||||
logging.info(
|
logging.info(
|
||||||
click.style(f"No documents found, using default index type for cleanup: {doc_form}", fg="yellow")
|
click.style(f"Invalid doc_form detected, using default index type for cleanup: {doc_form}", fg="yellow")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add exception handling around IndexProcessorFactory.clean() to prevent single point of failure
|
||||||
|
# This ensures Document/Segment deletion can continue even if vector database cleanup fails
|
||||||
|
try:
|
||||||
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
||||||
index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
|
index_processor.clean(dataset, None, with_keywords=True, delete_child_chunks=True)
|
||||||
|
logging.info(click.style(f"Successfully cleaned vector database for dataset: {dataset_id}", fg="green"))
|
||||||
|
except Exception as index_cleanup_error:
|
||||||
|
logging.exception(click.style(f"Failed to clean vector database for dataset {dataset_id}", fg="red"))
|
||||||
|
# Continue with document and segment deletion even if vector cleanup fails
|
||||||
|
logging.info(
|
||||||
|
click.style(f"Continuing with document and segment deletion for dataset: {dataset_id}", fg="yellow")
|
||||||
|
)
|
||||||
|
|
||||||
if documents is None or len(documents) == 0:
|
if documents is None or len(documents) == 0:
|
||||||
logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green"))
|
logging.info(click.style(f"No documents found for dataset: {dataset_id}", fg="green"))
|
||||||
@@ -128,6 +138,14 @@ def clean_dataset_task(
|
|||||||
click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green")
|
click.style(f"Cleaned dataset when dataset deleted: {dataset_id} latency: {end_at - start_at}", fg="green")
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
# Add rollback to prevent dirty session state in case of exceptions
|
||||||
|
# This ensures the database session is properly cleaned up
|
||||||
|
try:
|
||||||
|
db.session.rollback()
|
||||||
|
logging.info(click.style(f"Rolled back database session for dataset: {dataset_id}", fg="yellow"))
|
||||||
|
except Exception as rollback_error:
|
||||||
|
logging.exception("Failed to rollback database session")
|
||||||
|
|
||||||
logging.exception("Cleaned dataset when dataset deleted failed")
|
logging.exception("Cleaned dataset when dataset deleted failed")
|
||||||
finally:
|
finally:
|
||||||
db.session.close()
|
db.session.close()
|
||||||
|
Reference in New Issue
Block a user