@@ -1623,85 +1623,177 @@ class DocumentService:
|
|||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
DocumentIndexingError: If document is being indexed or not in correct state
|
DocumentIndexingError: If document is being indexed or not in correct state
|
||||||
|
ValueError: If action is invalid
|
||||||
"""
|
"""
|
||||||
if not document_ids:
|
if not document_ids:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Early validation of action parameter
|
||||||
|
valid_actions = ["enable", "disable", "archive", "un_archive"]
|
||||||
|
if action not in valid_actions:
|
||||||
|
raise ValueError(f"Invalid action: {action}. Must be one of {valid_actions}")
|
||||||
|
|
||||||
|
documents_to_update = []
|
||||||
|
|
||||||
|
# First pass: validate all documents and prepare updates
|
||||||
for document_id in document_ids:
|
for document_id in document_ids:
|
||||||
document = DocumentService.get_document(dataset.id, document_id)
|
document = DocumentService.get_document(dataset.id, document_id)
|
||||||
|
|
||||||
if not document:
|
if not document:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check if document is being indexed
|
||||||
indexing_cache_key = f"document_{document.id}_indexing"
|
indexing_cache_key = f"document_{document.id}_indexing"
|
||||||
cache_result = redis_client.get(indexing_cache_key)
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
if cache_result is not None:
|
if cache_result is not None:
|
||||||
raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later")
|
raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later")
|
||||||
|
|
||||||
if action == "enable":
|
# Prepare update based on action
|
||||||
if document.enabled:
|
update_info = DocumentService._prepare_document_status_update(document, action, user)
|
||||||
continue
|
if update_info:
|
||||||
document.enabled = True
|
documents_to_update.append(update_info)
|
||||||
document.disabled_at = None
|
|
||||||
document.disabled_by = None
|
# Second pass: apply all updates in a single transaction
|
||||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
if documents_to_update:
|
||||||
|
try:
|
||||||
|
for update_info in documents_to_update:
|
||||||
|
document = update_info["document"]
|
||||||
|
updates = update_info["updates"]
|
||||||
|
|
||||||
|
# Apply updates to the document
|
||||||
|
for field, value in updates.items():
|
||||||
|
setattr(document, field, value)
|
||||||
|
|
||||||
|
db.session.add(document)
|
||||||
|
|
||||||
|
# Batch commit all changes
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
except Exception as e:
|
||||||
# Set cache to prevent indexing the same document multiple times
|
# Rollback on any error
|
||||||
|
db.session.rollback()
|
||||||
|
raise e
|
||||||
|
# Execute async tasks and set Redis cache after successful commit
|
||||||
|
# propagation_error is used to capture any errors for submitting async task execution
|
||||||
|
propagation_error = None
|
||||||
|
for update_info in documents_to_update:
|
||||||
|
try:
|
||||||
|
# Execute async tasks after successful commit
|
||||||
|
if update_info["async_task"]:
|
||||||
|
task_info = update_info["async_task"]
|
||||||
|
task_func = task_info["function"]
|
||||||
|
task_args = task_info["args"]
|
||||||
|
task_func.delay(*task_args)
|
||||||
|
except Exception as e:
|
||||||
|
# Log the error but do not rollback the transaction
|
||||||
|
logging.exception(f"Error executing async task for document {update_info['document'].id}")
|
||||||
|
# don't raise the error immediately, but capture it for later
|
||||||
|
propagation_error = e
|
||||||
|
try:
|
||||||
|
# Set Redis cache if needed after successful commit
|
||||||
|
if update_info["set_cache"]:
|
||||||
|
document = update_info["document"]
|
||||||
|
indexing_cache_key = f"document_{document.id}_indexing"
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
redis_client.setex(indexing_cache_key, 600, 1)
|
||||||
|
except Exception as e:
|
||||||
|
# Log the error but do not rollback the transaction
|
||||||
|
logging.exception(f"Error setting cache for document {update_info['document'].id}")
|
||||||
|
# Raise any propagation error after all updates
|
||||||
|
if propagation_error:
|
||||||
|
raise propagation_error
|
||||||
|
|
||||||
add_document_to_index_task.delay(document_id)
|
@staticmethod
|
||||||
|
def _prepare_document_status_update(document, action: str, user):
|
||||||
|
"""
|
||||||
|
Prepare document status update information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
document: Document object to update
|
||||||
|
action: Action to perform
|
||||||
|
user: Current user
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Update information or None if no update needed
|
||||||
|
"""
|
||||||
|
now = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||||
|
|
||||||
|
if action == "enable":
|
||||||
|
return DocumentService._prepare_enable_update(document, now)
|
||||||
elif action == "disable":
|
elif action == "disable":
|
||||||
|
return DocumentService._prepare_disable_update(document, user, now)
|
||||||
|
elif action == "archive":
|
||||||
|
return DocumentService._prepare_archive_update(document, user, now)
|
||||||
|
elif action == "un_archive":
|
||||||
|
return DocumentService._prepare_unarchive_update(document, now)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prepare_enable_update(document, now):
|
||||||
|
"""Prepare updates for enabling a document."""
|
||||||
|
if document.enabled:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"document": document,
|
||||||
|
"updates": {"enabled": True, "disabled_at": None, "disabled_by": None, "updated_at": now},
|
||||||
|
"async_task": {"function": add_document_to_index_task, "args": [document.id]},
|
||||||
|
"set_cache": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _prepare_disable_update(document, user, now):
|
||||||
|
"""Prepare updates for disabling a document."""
|
||||||
if not document.completed_at or document.indexing_status != "completed":
|
if not document.completed_at or document.indexing_status != "completed":
|
||||||
raise DocumentIndexingError(f"Document: {document.name} is not completed.")
|
raise DocumentIndexingError(f"Document: {document.name} is not completed.")
|
||||||
|
|
||||||
if not document.enabled:
|
if not document.enabled:
|
||||||
continue
|
return None
|
||||||
|
|
||||||
document.enabled = False
|
return {
|
||||||
document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
"document": document,
|
||||||
document.disabled_by = user.id
|
"updates": {"enabled": False, "disabled_at": now, "disabled_by": user.id, "updated_at": now},
|
||||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
"async_task": {"function": remove_document_from_index_task, "args": [document.id]},
|
||||||
db.session.commit()
|
"set_cache": True,
|
||||||
|
}
|
||||||
|
|
||||||
# Set cache to prevent indexing the same document multiple times
|
@staticmethod
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
def _prepare_archive_update(document, user, now):
|
||||||
|
"""Prepare updates for archiving a document."""
|
||||||
remove_document_from_index_task.delay(document_id)
|
|
||||||
|
|
||||||
elif action == "archive":
|
|
||||||
if document.archived:
|
if document.archived:
|
||||||
continue
|
return None
|
||||||
|
|
||||||
document.archived = True
|
update_info = {
|
||||||
document.archived_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
"document": document,
|
||||||
document.archived_by = user.id
|
"updates": {"archived": True, "archived_at": now, "archived_by": user.id, "updated_at": now},
|
||||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
"async_task": None,
|
||||||
db.session.commit()
|
"set_cache": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only set async task and cache if document is currently enabled
|
||||||
if document.enabled:
|
if document.enabled:
|
||||||
# Set cache to prevent indexing the same document multiple times
|
update_info["async_task"] = {"function": remove_document_from_index_task, "args": [document.id]}
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
update_info["set_cache"] = True
|
||||||
|
|
||||||
remove_document_from_index_task.delay(document_id)
|
return update_info
|
||||||
|
|
||||||
elif action == "un_archive":
|
@staticmethod
|
||||||
|
def _prepare_unarchive_update(document, now):
|
||||||
|
"""Prepare updates for unarchiving a document."""
|
||||||
if not document.archived:
|
if not document.archived:
|
||||||
continue
|
return None
|
||||||
document.archived = False
|
|
||||||
document.archived_at = None
|
update_info = {
|
||||||
document.archived_by = None
|
"document": document,
|
||||||
document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
"updates": {"archived": False, "archived_at": None, "archived_by": None, "updated_at": now},
|
||||||
db.session.commit()
|
"async_task": None,
|
||||||
|
"set_cache": False,
|
||||||
|
}
|
||||||
|
|
||||||
# Only re-index if the document is currently enabled
|
# Only re-index if the document is currently enabled
|
||||||
if document.enabled:
|
if document.enabled:
|
||||||
# Set cache to prevent indexing the same document multiple times
|
update_info["async_task"] = {"function": add_document_to_index_task, "args": [document.id]}
|
||||||
redis_client.setex(indexing_cache_key, 600, 1)
|
update_info["set_cache"] = True
|
||||||
add_document_to_index_task.delay(document_id)
|
|
||||||
|
|
||||||
else:
|
return update_info
|
||||||
raise ValueError(f"Invalid action: {action}")
|
|
||||||
|
|
||||||
|
|
||||||
class SegmentService:
|
class SegmentService:
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
@@ -11,6 +12,24 @@ PROJECT_DIR = os.path.abspath(os.path.join(ABS_PATH, os.pardir, os.pardir))
|
|||||||
|
|
||||||
CACHED_APP = Flask(__name__)
|
CACHED_APP = Flask(__name__)
|
||||||
|
|
||||||
|
# set global mock for Redis client
|
||||||
|
redis_mock = MagicMock()
|
||||||
|
redis_mock.get = MagicMock(return_value=None)
|
||||||
|
redis_mock.setex = MagicMock()
|
||||||
|
redis_mock.setnx = MagicMock()
|
||||||
|
redis_mock.delete = MagicMock()
|
||||||
|
redis_mock.lock = MagicMock()
|
||||||
|
redis_mock.exists = MagicMock(return_value=False)
|
||||||
|
redis_mock.set = MagicMock()
|
||||||
|
redis_mock.expire = MagicMock()
|
||||||
|
redis_mock.hgetall = MagicMock(return_value={})
|
||||||
|
redis_mock.hdel = MagicMock()
|
||||||
|
redis_mock.incr = MagicMock(return_value=1)
|
||||||
|
|
||||||
|
# apply the mock to the Redis client in the Flask app
|
||||||
|
redis_patcher = patch("extensions.ext_redis.redis_client", redis_mock)
|
||||||
|
redis_patcher.start()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def app() -> Flask:
|
def app() -> Flask:
|
||||||
@@ -21,3 +40,19 @@ def app() -> Flask:
|
|||||||
def _provide_app_context(app: Flask):
|
def _provide_app_context(app: Flask):
|
||||||
with app.app_context():
|
with app.app_context():
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def reset_redis_mock():
|
||||||
|
"""reset the Redis mock before each test"""
|
||||||
|
redis_mock.reset_mock()
|
||||||
|
redis_mock.get.return_value = None
|
||||||
|
redis_mock.setex.return_value = None
|
||||||
|
redis_mock.setnx.return_value = None
|
||||||
|
redis_mock.delete.return_value = None
|
||||||
|
redis_mock.exists.return_value = False
|
||||||
|
redis_mock.set.return_value = None
|
||||||
|
redis_mock.expire.return_value = None
|
||||||
|
redis_mock.hgetall.return_value = {}
|
||||||
|
redis_mock.hdel.return_value = None
|
||||||
|
redis_mock.incr.return_value = 1
|
||||||
|
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user