@@ -1623,85 +1623,177 @@ class DocumentService:
|
||||
|
||||
Raises:
|
||||
DocumentIndexingError: If document is being indexed or not in correct state
|
||||
ValueError: If action is invalid
|
||||
"""
|
||||
if not document_ids:
|
||||
return
|
||||
|
||||
# Early validation of action parameter
|
||||
valid_actions = ["enable", "disable", "archive", "un_archive"]
|
||||
if action not in valid_actions:
|
||||
raise ValueError(f"Invalid action: {action}. Must be one of {valid_actions}")
|
||||
|
||||
documents_to_update = []
|
||||
|
||||
# First pass: validate all documents and prepare updates
|
||||
for document_id in document_ids:
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
if not document:
|
||||
continue
|
||||
|
||||
# Check if document is being indexed
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later")
|
||||
|
||||
if action == "enable":
|
||||
if document.enabled:
|
||||
continue
|
||||
document.enabled = True
|
||||
document.disabled_at = None
|
||||
document.disabled_by = None
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
# Prepare update based on action
|
||||
update_info = DocumentService._prepare_document_status_update(document, action, user)
|
||||
if update_info:
|
||||
documents_to_update.append(update_info)
|
||||
|
||||
# Second pass: apply all updates in a single transaction
|
||||
if documents_to_update:
|
||||
try:
|
||||
for update_info in documents_to_update:
|
||||
document = update_info["document"]
|
||||
updates = update_info["updates"]
|
||||
|
||||
# Apply updates to the document
|
||||
for field, value in updates.items():
|
||||
setattr(document, field, value)
|
||||
|
||||
db.session.add(document)
|
||||
|
||||
# Batch commit all changes
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
except Exception as e:
|
||||
# Rollback on any error
|
||||
db.session.rollback()
|
||||
raise e
|
||||
# Execute async tasks and set Redis cache after successful commit
|
||||
# propagation_error is used to capture any errors for submitting async task execution
|
||||
propagation_error = None
|
||||
for update_info in documents_to_update:
|
||||
try:
|
||||
# Execute async tasks after successful commit
|
||||
if update_info["async_task"]:
|
||||
task_info = update_info["async_task"]
|
||||
task_func = task_info["function"]
|
||||
task_args = task_info["args"]
|
||||
task_func.delay(*task_args)
|
||||
except Exception as e:
|
||||
# Log the error but do not rollback the transaction
|
||||
logging.exception(f"Error executing async task for document {update_info['document'].id}")
|
||||
# don't raise the error immediately, but capture it for later
|
||||
propagation_error = e
|
||||
try:
|
||||
# Set Redis cache if needed after successful commit
|
||||
if update_info["set_cache"]:
|
||||
document = update_info["document"]
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
except Exception as e:
|
||||
# Log the error but do not rollback the transaction
|
||||
logging.exception(f"Error setting cache for document {update_info['document'].id}")
|
||||
# Raise any propagation error after all updates
|
||||
if propagation_error:
|
||||
raise propagation_error
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
@staticmethod
|
||||
def _prepare_document_status_update(document, action: str, user):
|
||||
"""
|
||||
Prepare document status update information.
|
||||
|
||||
Args:
|
||||
document: Document object to update
|
||||
action: Action to perform
|
||||
user: Current user
|
||||
|
||||
Returns:
|
||||
dict: Update information or None if no update needed
|
||||
"""
|
||||
now = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
|
||||
if action == "enable":
|
||||
return DocumentService._prepare_enable_update(document, now)
|
||||
elif action == "disable":
|
||||
return DocumentService._prepare_disable_update(document, user, now)
|
||||
elif action == "archive":
|
||||
return DocumentService._prepare_archive_update(document, user, now)
|
||||
elif action == "un_archive":
|
||||
return DocumentService._prepare_unarchive_update(document, now)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _prepare_enable_update(document, now):
|
||||
"""Prepare updates for enabling a document."""
|
||||
if document.enabled:
|
||||
return None
|
||||
|
||||
return {
|
||||
"document": document,
|
||||
"updates": {"enabled": True, "disabled_at": None, "disabled_by": None, "updated_at": now},
|
||||
"async_task": {"function": add_document_to_index_task, "args": [document.id]},
|
||||
"set_cache": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _prepare_disable_update(document, user, now):
|
||||
"""Prepare updates for disabling a document."""
|
||||
if not document.completed_at or document.indexing_status != "completed":
|
||||
raise DocumentIndexingError(f"Document: {document.name} is not completed.")
|
||||
|
||||
if not document.enabled:
|
||||
continue
|
||||
return None
|
||||
|
||||
document.enabled = False
|
||||
document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
document.disabled_by = user.id
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
return {
|
||||
"document": document,
|
||||
"updates": {"enabled": False, "disabled_at": now, "disabled_by": user.id, "updated_at": now},
|
||||
"async_task": {"function": remove_document_from_index_task, "args": [document.id]},
|
||||
"set_cache": True,
|
||||
}
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "archive":
|
||||
@staticmethod
|
||||
def _prepare_archive_update(document, user, now):
|
||||
"""Prepare updates for archiving a document."""
|
||||
if document.archived:
|
||||
continue
|
||||
return None
|
||||
|
||||
document.archived = True
|
||||
document.archived_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
document.archived_by = user.id
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
update_info = {
|
||||
"document": document,
|
||||
"updates": {"archived": True, "archived_at": now, "archived_by": user.id, "updated_at": now},
|
||||
"async_task": None,
|
||||
"set_cache": False,
|
||||
}
|
||||
|
||||
# Only set async task and cache if document is currently enabled
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
update_info["async_task"] = {"function": remove_document_from_index_task, "args": [document.id]}
|
||||
update_info["set_cache"] = True
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
return update_info
|
||||
|
||||
elif action == "un_archive":
|
||||
@staticmethod
|
||||
def _prepare_unarchive_update(document, now):
|
||||
"""Prepare updates for unarchiving a document."""
|
||||
if not document.archived:
|
||||
continue
|
||||
document.archived = False
|
||||
document.archived_at = None
|
||||
document.archived_by = None
|
||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
return None
|
||||
|
||||
update_info = {
|
||||
"document": document,
|
||||
"updates": {"archived": False, "archived_at": None, "archived_by": None, "updated_at": now},
|
||||
"async_task": None,
|
||||
"set_cache": False,
|
||||
}
|
||||
|
||||
# Only re-index if the document is currently enabled
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
add_document_to_index_task.delay(document_id)
|
||||
update_info["async_task"] = {"function": add_document_to_index_task, "args": [document.id]}
|
||||
update_info["set_cache"] = True
|
||||
|
||||
else:
|
||||
raise ValueError(f"Invalid action: {action}")
|
||||
return update_info
|
||||
|
||||
|
||||
class SegmentService:
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
@@ -11,6 +12,24 @@ PROJECT_DIR = os.path.abspath(os.path.join(ABS_PATH, os.pardir, os.pardir))
|
||||
|
||||
CACHED_APP = Flask(__name__)
|
||||
|
||||
# set global mock for Redis client
|
||||
redis_mock = MagicMock()
|
||||
redis_mock.get = MagicMock(return_value=None)
|
||||
redis_mock.setex = MagicMock()
|
||||
redis_mock.setnx = MagicMock()
|
||||
redis_mock.delete = MagicMock()
|
||||
redis_mock.lock = MagicMock()
|
||||
redis_mock.exists = MagicMock(return_value=False)
|
||||
redis_mock.set = MagicMock()
|
||||
redis_mock.expire = MagicMock()
|
||||
redis_mock.hgetall = MagicMock(return_value={})
|
||||
redis_mock.hdel = MagicMock()
|
||||
redis_mock.incr = MagicMock(return_value=1)
|
||||
|
||||
# apply the mock to the Redis client in the Flask app
|
||||
redis_patcher = patch("extensions.ext_redis.redis_client", redis_mock)
|
||||
redis_patcher.start()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app() -> Flask:
|
||||
@@ -21,3 +40,19 @@ def app() -> Flask:
|
||||
def _provide_app_context(app: Flask):
|
||||
with app.app_context():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_redis_mock():
|
||||
"""reset the Redis mock before each test"""
|
||||
redis_mock.reset_mock()
|
||||
redis_mock.get.return_value = None
|
||||
redis_mock.setex.return_value = None
|
||||
redis_mock.setnx.return_value = None
|
||||
redis_mock.delete.return_value = None
|
||||
redis_mock.exists.return_value = False
|
||||
redis_mock.set.return_value = None
|
||||
redis_mock.expire.return_value = None
|
||||
redis_mock.hgetall.return_value = {}
|
||||
redis_mock.hdel.return_value = None
|
||||
redis_mock.incr.return_value = 1
|
||||
|
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user