Files
dify/api/tests/unit_tests/services/test_dataset_service.py

1239 lines
50 KiB
Python

import datetime
import unittest
# Mock redis_client before importing dataset_service
from unittest.mock import Mock, call, patch
import pytest
from models.dataset import Dataset, Document
from services.dataset_service import DocumentService
from services.errors.document import DocumentIndexingError
from tests.unit_tests.conftest import redis_mock
class TestDatasetServiceBatchUpdateDocumentStatus(unittest.TestCase):
"""
Comprehensive unit tests for DocumentService.batch_update_document_status method.
This test suite covers all supported actions (enable, disable, archive, un_archive),
error conditions, edge cases, and validates proper interaction with Redis cache,
database operations, and async task triggers.
"""
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_enable_documents_success(self, mock_datetime, mock_get_doc, mock_add_task, mock_db):
"""
Test successful enabling of disabled documents.
Verifies that:
1. Only disabled documents are processed (already enabled documents are skipped)
2. Document attributes are updated correctly (enabled=True, metadata cleared)
3. Database changes are committed for each document
4. Redis cache keys are set to prevent concurrent indexing
5. Async indexing task is triggered for each enabled document
6. Timestamp fields are properly updated
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock disabled document
mock_disabled_doc_1 = Mock(spec=Document)
mock_disabled_doc_1.id = "doc-1"
mock_disabled_doc_1.name = "disabled_document.pdf"
mock_disabled_doc_1.enabled = False
mock_disabled_doc_1.archived = False
mock_disabled_doc_1.indexing_status = "completed"
mock_disabled_doc_1.completed_at = datetime.datetime.now()
mock_disabled_doc_2 = Mock(spec=Document)
mock_disabled_doc_2.id = "doc-2"
mock_disabled_doc_2.name = "disabled_document.pdf"
mock_disabled_doc_2.enabled = False
mock_disabled_doc_2.archived = False
mock_disabled_doc_2.indexing_status = "completed"
mock_disabled_doc_2.completed_at = datetime.datetime.now()
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
# Mock document retrieval to return disabled documents
mock_get_doc.side_effect = [mock_disabled_doc_1, mock_disabled_doc_2]
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Call the method to enable documents
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1", "doc-2"], action="enable", user=mock_user
)
# Verify document attributes were updated correctly
for mock_doc in [mock_disabled_doc_1, mock_disabled_doc_2]:
# Check that document was enabled
assert mock_doc.enabled == True
# Check that disable metadata was cleared
assert mock_doc.disabled_at is None
assert mock_doc.disabled_by is None
# Check that update timestamp was set
assert mock_doc.updated_at == current_time.replace(tzinfo=None)
# Verify Redis cache operations
expected_cache_calls = [call("document_doc-1_indexing"), call("document_doc-2_indexing")]
redis_mock.get.assert_has_calls(expected_cache_calls)
# Verify Redis cache was set to prevent concurrent indexing (600 seconds)
expected_setex_calls = [call("document_doc-1_indexing", 600, 1), call("document_doc-2_indexing", 600, 1)]
redis_mock.setex.assert_has_calls(expected_setex_calls)
# Verify async tasks were triggered for indexing
expected_task_calls = [call("doc-1"), call("doc-2")]
mock_add_task.delay.assert_has_calls(expected_task_calls)
# Verify database add counts (one add for one document)
assert mock_db.add.call_count == 2
# Verify database commits (one commit for the batch operation)
assert mock_db.commit.call_count == 1
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.remove_document_from_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_disable_documents_success(self, mock_datetime, mock_get_doc, mock_remove_task, mock_db):
"""
Test successful disabling of enabled and completed documents.
Verifies that:
1. Only completed and enabled documents can be disabled
2. Document attributes are updated correctly (enabled=False, disable metadata set)
3. User ID is recorded in disabled_by field
4. Database changes are committed for each document
5. Redis cache keys are set to prevent concurrent indexing
6. Async task is triggered to remove documents from index
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock enabled document
mock_enabled_doc_1 = Mock(spec=Document)
mock_enabled_doc_1.id = "doc-1"
mock_enabled_doc_1.name = "enabled_document.pdf"
mock_enabled_doc_1.enabled = True
mock_enabled_doc_1.archived = False
mock_enabled_doc_1.indexing_status = "completed"
mock_enabled_doc_1.completed_at = datetime.datetime.now()
mock_enabled_doc_2 = Mock(spec=Document)
mock_enabled_doc_2.id = "doc-2"
mock_enabled_doc_2.name = "enabled_document.pdf"
mock_enabled_doc_2.enabled = True
mock_enabled_doc_2.archived = False
mock_enabled_doc_2.indexing_status = "completed"
mock_enabled_doc_2.completed_at = datetime.datetime.now()
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
# Mock document retrieval to return enabled, completed documents
mock_get_doc.side_effect = [mock_enabled_doc_1, mock_enabled_doc_2]
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Call the method to disable documents
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1", "doc-2"], action="disable", user=mock_user
)
# Verify document attributes were updated correctly
for mock_doc in [mock_enabled_doc_1, mock_enabled_doc_2]:
# Check that document was disabled
assert mock_doc.enabled == False
# Check that disable metadata was set correctly
assert mock_doc.disabled_at == current_time.replace(tzinfo=None)
assert mock_doc.disabled_by == mock_user.id
# Check that update timestamp was set
assert mock_doc.updated_at == current_time.replace(tzinfo=None)
# Verify Redis cache operations for indexing prevention
expected_setex_calls = [call("document_doc-1_indexing", 600, 1), call("document_doc-2_indexing", 600, 1)]
redis_mock.setex.assert_has_calls(expected_setex_calls)
# Verify async tasks were triggered to remove from index
expected_task_calls = [call("doc-1"), call("doc-2")]
mock_remove_task.delay.assert_has_calls(expected_task_calls)
# Verify database add counts (one add for one document)
assert mock_db.add.call_count == 2
# Verify database commits (totally 1 for any batch operation)
assert mock_db.commit.call_count == 1
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.remove_document_from_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_archive_documents_success(self, mock_datetime, mock_get_doc, mock_remove_task, mock_db):
"""
Test successful archiving of unarchived documents.
Verifies that:
1. Only unarchived documents are processed (already archived are skipped)
2. Document attributes are updated correctly (archived=True, archive metadata set)
3. User ID is recorded in archived_by field
4. If documents are enabled, they are removed from the index
5. Redis cache keys are set only for enabled documents being archived
6. Database changes are committed for each document
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create unarchived enabled document
unarchived_doc = Mock(spec=Document)
# Manually set attributes to ensure they can be modified
unarchived_doc.id = "doc-1"
unarchived_doc.name = "unarchived_document.pdf"
unarchived_doc.enabled = True
unarchived_doc.archived = False
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
mock_get_doc.return_value = unarchived_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Call the method to archive documents
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="archive", user=mock_user
)
# Verify document attributes were updated correctly
assert unarchived_doc.archived == True
assert unarchived_doc.archived_at == current_time.replace(tzinfo=None)
assert unarchived_doc.archived_by == mock_user.id
assert unarchived_doc.updated_at == current_time.replace(tzinfo=None)
# Verify Redis cache was set (because document was enabled)
redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1)
# Verify async task was triggered to remove from index (because enabled)
mock_remove_task.delay.assert_called_once_with("doc-1")
# Verify database add
mock_db.add.assert_called_once()
# Verify database commit
mock_db.commit.assert_called_once()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_unarchive_documents_success(self, mock_datetime, mock_get_doc, mock_add_task, mock_db):
"""
Test successful unarchiving of archived documents.
Verifies that:
1. Only archived documents are processed (already unarchived are skipped)
2. Document attributes are updated correctly (archived=False, archive metadata cleared)
3. If documents are enabled, they are added back to the index
4. Redis cache keys are set only for enabled documents being unarchived
5. Database changes are committed for each document
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock archived document
mock_archived_doc = Mock(spec=Document)
mock_archived_doc.id = "doc-3"
mock_archived_doc.name = "archived_document.pdf"
mock_archived_doc.enabled = True
mock_archived_doc.archived = True
mock_archived_doc.indexing_status = "completed"
mock_archived_doc.completed_at = datetime.datetime.now()
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
mock_get_doc.return_value = mock_archived_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Call the method to unarchive documents
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-3"], action="un_archive", user=mock_user
)
# Verify document attributes were updated correctly
assert mock_archived_doc.archived == False
assert mock_archived_doc.archived_at is None
assert mock_archived_doc.archived_by is None
assert mock_archived_doc.updated_at == current_time.replace(tzinfo=None)
# Verify Redis cache was set (because document is enabled)
redis_mock.setex.assert_called_once_with("document_doc-3_indexing", 600, 1)
# Verify async task was triggered to add back to index (because enabled)
mock_add_task.delay.assert_called_once_with("doc-3")
# Verify database add
mock_db.add.assert_called_once()
# Verify database commit
mock_db.commit.assert_called_once()
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_document_indexing_error_redis_cache_hit(self, mock_get_doc):
"""
Test that DocumentIndexingError is raised when documents are currently being indexed.
Verifies that:
1. The method checks Redis cache for active indexing operations
2. DocumentIndexingError is raised if any document is being indexed
3. Error message includes the document name for user feedback
4. No further processing occurs when indexing is detected
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock enabled document
mock_enabled_doc = Mock(spec=Document)
mock_enabled_doc.id = "doc-1"
mock_enabled_doc.name = "enabled_document.pdf"
mock_enabled_doc.enabled = True
mock_enabled_doc.archived = False
mock_enabled_doc.indexing_status = "completed"
mock_enabled_doc.completed_at = datetime.datetime.now()
# Set up mock to indicate document is being indexed
mock_get_doc.return_value = mock_enabled_doc
# Reset module-level Redis mock, set to indexing status
redis_mock.reset_mock()
redis_mock.get.return_value = "indexing"
# Verify that DocumentIndexingError is raised
with pytest.raises(DocumentIndexingError) as exc_info:
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user
)
# Verify error message contains document name
assert "enabled_document.pdf" in str(exc_info.value)
assert "is being indexed" in str(exc_info.value)
# Verify Redis cache was checked
redis_mock.get.assert_called_once_with("document_doc-1_indexing")
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_disable_non_completed_document_error(self, mock_get_doc):
"""
Test that DocumentIndexingError is raised when trying to disable non-completed documents.
Verifies that:
1. Only completed documents can be disabled
2. DocumentIndexingError is raised for non-completed documents
3. Error message indicates the document is not completed
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create a document that's not completed
non_completed_doc = Mock(spec=Document)
# Manually set attributes to ensure they can be modified
non_completed_doc.id = "doc-1"
non_completed_doc.name = "indexing_document.pdf"
non_completed_doc.enabled = True
non_completed_doc.indexing_status = "indexing" # Not completed
non_completed_doc.completed_at = None # Not completed
mock_get_doc.return_value = non_completed_doc
# Verify that DocumentIndexingError is raised
with pytest.raises(DocumentIndexingError) as exc_info:
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="disable", user=mock_user
)
# Verify error message indicates document is not completed
assert "is not completed" in str(exc_info.value)
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_empty_document_list(self, mock_get_doc):
"""
Test batch operations with an empty document ID list.
Verifies that:
1. The method handles empty input gracefully
2. No document operations are performed with empty input
3. No errors are raised with empty input
4. Method returns early without processing
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Call method with empty document list
result = DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=[], action="enable", user=mock_user
)
# Verify no document lookups were performed
mock_get_doc.assert_not_called()
# Verify method returns None (early return)
assert result is None
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_document_not_found_skipped(self, mock_get_doc):
"""
Test behavior when some documents don't exist in the database.
Verifies that:
1. Non-existent documents are gracefully skipped
2. Processing continues for existing documents
3. No errors are raised for missing document IDs
4. Method completes successfully despite missing documents
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Mock document service to return None (document not found)
mock_get_doc.return_value = None
# Call method with non-existent document ID
# This should not raise an error, just skip the missing document
try:
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["non-existent-doc"], action="enable", user=mock_user
)
except Exception as e:
pytest.fail(f"Method should not raise exception for missing documents: {e}")
# Verify document lookup was attempted
mock_get_doc.assert_called_once_with(mock_dataset.id, "non-existent-doc")
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_enable_already_enabled_document_skipped(self, mock_get_doc, mock_db):
"""
Test enabling documents that are already enabled.
Verifies that:
1. Already enabled documents are skipped (no unnecessary operations)
2. No database commits occur for already enabled documents
3. No Redis cache operations occur for skipped documents
4. No async tasks are triggered for skipped documents
5. Method completes successfully
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock enabled document
mock_enabled_doc = Mock(spec=Document)
mock_enabled_doc.id = "doc-1"
mock_enabled_doc.name = "enabled_document.pdf"
mock_enabled_doc.enabled = True
mock_enabled_doc.archived = False
mock_enabled_doc.indexing_status = "completed"
mock_enabled_doc.completed_at = datetime.datetime.now()
# Mock document that is already enabled
mock_get_doc.return_value = mock_enabled_doc # Already enabled
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Attempt to enable already enabled document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user
)
# Verify no database operations occurred (document was skipped)
mock_db.commit.assert_not_called()
# Verify no Redis setex operations occurred (document was skipped)
redis_mock.setex.assert_not_called()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_archive_already_archived_document_skipped(self, mock_get_doc, mock_db):
"""
Test archiving documents that are already archived.
Verifies that:
1. Already archived documents are skipped (no unnecessary operations)
2. No database commits occur for already archived documents
3. No Redis cache operations occur for skipped documents
4. No async tasks are triggered for skipped documents
5. Method completes successfully
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock archived document
mock_archived_doc = Mock(spec=Document)
mock_archived_doc.id = "doc-3"
mock_archived_doc.name = "archived_document.pdf"
mock_archived_doc.enabled = True
mock_archived_doc.archived = True
mock_archived_doc.indexing_status = "completed"
mock_archived_doc.completed_at = datetime.datetime.now()
# Mock document that is already archived
mock_get_doc.return_value = mock_archived_doc # Already archived
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Attempt to archive already archived document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-3"], action="archive", user=mock_user
)
# Verify no database operations occurred (document was skipped)
mock_db.commit.assert_not_called()
# Verify no Redis setex operations occurred (document was skipped)
redis_mock.setex.assert_not_called()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.remove_document_from_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_mixed_document_states_and_actions(
self, mock_datetime, mock_get_doc, mock_remove_task, mock_add_task, mock_db
):
"""
Test batch operations on documents with mixed states and various scenarios.
Verifies that:
1. Each document is processed according to its current state
2. Some documents may be skipped while others are processed
3. Different async tasks are triggered based on document states
4. Method handles mixed scenarios gracefully
5. Database commits occur only for documents that were actually modified
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock documents with different states
mock_disabled_doc = Mock(spec=Document)
mock_disabled_doc.id = "doc-1"
mock_disabled_doc.name = "disabled_document.pdf"
mock_disabled_doc.enabled = False
mock_disabled_doc.archived = False
mock_disabled_doc.indexing_status = "completed"
mock_disabled_doc.completed_at = datetime.datetime.now()
mock_enabled_doc = Mock(spec=Document)
mock_enabled_doc.id = "doc-2"
mock_enabled_doc.name = "enabled_document.pdf"
mock_enabled_doc.enabled = True
mock_enabled_doc.archived = False
mock_enabled_doc.indexing_status = "completed"
mock_enabled_doc.completed_at = datetime.datetime.now()
mock_archived_doc = Mock(spec=Document)
mock_archived_doc.id = "doc-3"
mock_archived_doc.name = "archived_document.pdf"
mock_archived_doc.enabled = True
mock_archived_doc.archived = True
mock_archived_doc.indexing_status = "completed"
mock_archived_doc.completed_at = datetime.datetime.now()
# Set up mixed document states
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
# Mix of different document states
documents = [
mock_disabled_doc, # Will be enabled
mock_enabled_doc, # Already enabled, will be skipped
mock_archived_doc, # Archived but enabled, will be skipped for enable action
]
mock_get_doc.side_effect = documents
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Perform enable operation on mixed state documents
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1", "doc-2", "doc-3"], action="enable", user=mock_user
)
# Verify only the disabled document was processed
# (enabled and archived documents should be skipped for enable action)
# Only one add should occur (for the disabled document that was enabled)
mock_db.add.assert_called_once()
# Only one commit should occur
mock_db.commit.assert_called_once()
# Only one Redis setex should occur (for the document that was enabled)
redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1)
# Only one async task should be triggered (for the document that was enabled)
mock_add_task.delay.assert_called_once_with("doc-1")
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.remove_document_from_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_archive_disabled_document_no_index_removal(
self, mock_datetime, mock_get_doc, mock_remove_task, mock_db
):
"""
Test archiving disabled documents (should not trigger index removal).
Verifies that:
1. Disabled documents can be archived
2. Archive metadata is set correctly
3. No index removal task is triggered (because document is disabled)
4. No Redis cache key is set (because document is disabled)
5. Database commit still occurs
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Set up disabled, unarchived document
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
disabled_unarchived_doc = Mock(spec=Document)
# Manually set attributes to ensure they can be modified
disabled_unarchived_doc.id = "doc-1"
disabled_unarchived_doc.name = "disabled_document.pdf"
disabled_unarchived_doc.enabled = False # Disabled
disabled_unarchived_doc.archived = False # Not archived
mock_get_doc.return_value = disabled_unarchived_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Archive the disabled document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="archive", user=mock_user
)
# Verify document was archived
assert disabled_unarchived_doc.archived == True
assert disabled_unarchived_doc.archived_at == current_time.replace(tzinfo=None)
assert disabled_unarchived_doc.archived_by == mock_user.id
# Verify no Redis cache was set (document is disabled)
redis_mock.setex.assert_not_called()
# Verify no index removal task was triggered (document is disabled)
mock_remove_task.delay.assert_not_called()
# Verify database add still occurred
mock_db.add.assert_called_once()
# Verify database commit still occurred
mock_db.commit.assert_called_once()
@patch("services.dataset_service.DocumentService.get_document")
def test_batch_update_invalid_action_error(self, mock_get_doc):
"""
Test that ValueError is raised when an invalid action is provided.
Verifies that:
1. Invalid actions are rejected with ValueError
2. Error message includes the invalid action name
3. No document processing occurs with invalid actions
4. Method fails fast on invalid input
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock document
mock_doc = Mock(spec=Document)
mock_doc.id = "doc-1"
mock_doc.name = "test_document.pdf"
mock_doc.enabled = True
mock_doc.archived = False
mock_get_doc.return_value = mock_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Test with invalid action
invalid_action = "invalid_action"
with pytest.raises(ValueError) as exc_info:
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action=invalid_action, user=mock_user
)
# Verify error message contains the invalid action
assert invalid_action in str(exc_info.value)
assert "Invalid action" in str(exc_info.value)
# Verify no Redis operations occurred
redis_mock.setex.assert_not_called()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_disable_already_disabled_document_skipped(
self, mock_datetime, mock_get_doc, mock_add_task, mock_db
):
"""
Test disabling documents that are already disabled.
Verifies that:
1. Already disabled documents are skipped (no unnecessary operations)
2. No database commits occur for already disabled documents
3. No Redis cache operations occur for skipped documents
4. No async tasks are triggered for skipped documents
5. Method completes successfully
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock disabled document
mock_disabled_doc = Mock(spec=Document)
mock_disabled_doc.id = "doc-1"
mock_disabled_doc.name = "disabled_document.pdf"
mock_disabled_doc.enabled = False # Already disabled
mock_disabled_doc.archived = False
mock_disabled_doc.indexing_status = "completed"
mock_disabled_doc.completed_at = datetime.datetime.now()
# Mock document that is already disabled
mock_get_doc.return_value = mock_disabled_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Attempt to disable already disabled document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="disable", user=mock_user
)
# Verify no database operations occurred (document was skipped)
mock_db.commit.assert_not_called()
# Verify no Redis setex operations occurred (document was skipped)
redis_mock.setex.assert_not_called()
# Verify no async tasks were triggered (document was skipped)
mock_add_task.delay.assert_not_called()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_unarchive_already_unarchived_document_skipped(
self, mock_datetime, mock_get_doc, mock_add_task, mock_db
):
"""
Test unarchiving documents that are already unarchived.
Verifies that:
1. Already unarchived documents are skipped (no unnecessary operations)
2. No database commits occur for already unarchived documents
3. No Redis cache operations occur for skipped documents
4. No async tasks are triggered for skipped documents
5. Method completes successfully
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock unarchived document
mock_unarchived_doc = Mock(spec=Document)
mock_unarchived_doc.id = "doc-1"
mock_unarchived_doc.name = "unarchived_document.pdf"
mock_unarchived_doc.enabled = True
mock_unarchived_doc.archived = False # Already unarchived
mock_unarchived_doc.indexing_status = "completed"
mock_unarchived_doc.completed_at = datetime.datetime.now()
# Mock document that is already unarchived
mock_get_doc.return_value = mock_unarchived_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Attempt to unarchive already unarchived document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="un_archive", user=mock_user
)
# Verify no database operations occurred (document was skipped)
mock_db.commit.assert_not_called()
# Verify no Redis setex operations occurred (document was skipped)
redis_mock.setex.assert_not_called()
# Verify no async tasks were triggered (document was skipped)
mock_add_task.delay.assert_not_called()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_unarchive_disabled_document_no_index_addition(
self, mock_datetime, mock_get_doc, mock_add_task, mock_db
):
"""
Test unarchiving disabled documents (should not trigger index addition).
Verifies that:
1. Disabled documents can be unarchived
2. Unarchive metadata is cleared correctly
3. No index addition task is triggered (because document is disabled)
4. No Redis cache key is set (because document is disabled)
5. Database commit still occurs
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock archived but disabled document
mock_archived_disabled_doc = Mock(spec=Document)
mock_archived_disabled_doc.id = "doc-1"
mock_archived_disabled_doc.name = "archived_disabled_document.pdf"
mock_archived_disabled_doc.enabled = False # Disabled
mock_archived_disabled_doc.archived = True # Archived
mock_archived_disabled_doc.indexing_status = "completed"
mock_archived_disabled_doc.completed_at = datetime.datetime.now()
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
mock_get_doc.return_value = mock_archived_disabled_doc
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Unarchive the disabled document
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="un_archive", user=mock_user
)
# Verify document was unarchived
assert mock_archived_disabled_doc.archived == False
assert mock_archived_disabled_doc.archived_at is None
assert mock_archived_disabled_doc.archived_by is None
assert mock_archived_disabled_doc.updated_at == current_time.replace(tzinfo=None)
# Verify no Redis cache was set (document is disabled)
redis_mock.setex.assert_not_called()
# Verify no index addition task was triggered (document is disabled)
mock_add_task.delay.assert_not_called()
# Verify database add still occurred
mock_db.add.assert_called_once()
# Verify database commit still occurred
mock_db.commit.assert_called_once()
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_async_task_error_handling(self, mock_datetime, mock_get_doc, mock_add_task, mock_db):
"""
Test handling of async task errors during batch operations.
Verifies that:
1. Async task errors are properly handled
2. Database operations complete successfully
3. Redis cache operations complete successfully
4. Method continues processing despite async task errors
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create mock disabled document
mock_disabled_doc = Mock(spec=Document)
mock_disabled_doc.id = "doc-1"
mock_disabled_doc.name = "disabled_document.pdf"
mock_disabled_doc.enabled = False
mock_disabled_doc.archived = False
mock_disabled_doc.indexing_status = "completed"
mock_disabled_doc.completed_at = datetime.datetime.now()
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
mock_get_doc.return_value = mock_disabled_doc
# Mock async task to raise an exception
mock_add_task.delay.side_effect = Exception("Celery task error")
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Verify that async task error is propagated
with pytest.raises(Exception) as exc_info:
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=["doc-1"], action="enable", user=mock_user
)
# Verify error message
assert "Celery task error" in str(exc_info.value)
# Verify database operations completed successfully
mock_db.add.assert_called_once()
mock_db.commit.assert_called_once()
# Verify Redis cache was set successfully
redis_mock.setex.assert_called_once_with("document_doc-1_indexing", 600, 1)
# Verify document was updated
assert mock_disabled_doc.enabled == True
assert mock_disabled_doc.disabled_at is None
assert mock_disabled_doc.disabled_by is None
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_large_document_list_performance(self, mock_datetime, mock_get_doc, mock_add_task, mock_db):
"""
Test batch operations with a large number of documents.
Verifies that:
1. Method can handle large document lists efficiently
2. All documents are processed correctly
3. Database commits occur for each document
4. Redis cache operations occur for each document
5. Async tasks are triggered for each document
6. Performance remains consistent with large inputs
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create large list of document IDs
document_ids = [f"doc-{i}" for i in range(1, 101)] # 100 documents
# Create mock documents
mock_documents = []
for i in range(1, 101):
mock_doc = Mock(spec=Document)
mock_doc.id = f"doc-{i}"
mock_doc.name = f"document_{i}.pdf"
mock_doc.enabled = False # All disabled, will be enabled
mock_doc.archived = False
mock_doc.indexing_status = "completed"
mock_doc.completed_at = datetime.datetime.now()
mock_documents.append(mock_doc)
# Set up mock return values
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
mock_get_doc.side_effect = mock_documents
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Perform batch enable operation
DocumentService.batch_update_document_status(
dataset=mock_dataset, document_ids=document_ids, action="enable", user=mock_user
)
# Verify all documents were processed
assert mock_get_doc.call_count == 100
# Verify all documents were updated
for mock_doc in mock_documents:
assert mock_doc.enabled == True
assert mock_doc.disabled_at is None
assert mock_doc.disabled_by is None
assert mock_doc.updated_at == current_time.replace(tzinfo=None)
# Verify database commits, one add for one document
assert mock_db.add.call_count == 100
# Verify database commits, one commit for the batch operation
assert mock_db.commit.call_count == 1
# Verify Redis cache operations occurred for each document
assert redis_mock.setex.call_count == 100
# Verify async tasks were triggered for each document
assert mock_add_task.delay.call_count == 100
# Verify correct Redis cache keys were set
expected_redis_calls = [call(f"document_doc-{i}_indexing", 600, 1) for i in range(1, 101)]
redis_mock.setex.assert_has_calls(expected_redis_calls)
# Verify correct async task calls
expected_task_calls = [call(f"doc-{i}") for i in range(1, 101)]
mock_add_task.delay.assert_has_calls(expected_task_calls)
@patch("extensions.ext_database.db.session")
@patch("services.dataset_service.add_document_to_index_task")
@patch("services.dataset_service.DocumentService.get_document")
@patch("services.dataset_service.datetime")
def test_batch_update_mixed_document_states_complex_scenario(
self, mock_datetime, mock_get_doc, mock_add_task, mock_db
):
"""
Test complex batch operations with documents in various states.
Verifies that:
1. Each document is processed according to its current state
2. Some documents are skipped while others are processed
3. Different actions trigger different async tasks
4. Database commits occur only for modified documents
5. Redis cache operations occur only for relevant documents
6. Method handles complex mixed scenarios correctly
"""
# Create mock dataset
mock_dataset = Mock(spec=Dataset)
mock_dataset.id = "dataset-123"
mock_dataset.tenant_id = "tenant-456"
# Create mock user
mock_user = Mock()
mock_user.id = "user-789"
# Create documents in various states
current_time = datetime.datetime(2023, 1, 1, 12, 0, 0)
mock_datetime.datetime.now.return_value = current_time
mock_datetime.UTC = datetime.UTC
# Document 1: Disabled, will be enabled
doc1 = Mock(spec=Document)
doc1.id = "doc-1"
doc1.name = "disabled_doc.pdf"
doc1.enabled = False
doc1.archived = False
doc1.indexing_status = "completed"
doc1.completed_at = datetime.datetime.now()
# Document 2: Already enabled, will be skipped
doc2 = Mock(spec=Document)
doc2.id = "doc-2"
doc2.name = "enabled_doc.pdf"
doc2.enabled = True
doc2.archived = False
doc2.indexing_status = "completed"
doc2.completed_at = datetime.datetime.now()
# Document 3: Enabled and completed, will be disabled
doc3 = Mock(spec=Document)
doc3.id = "doc-3"
doc3.name = "enabled_completed_doc.pdf"
doc3.enabled = True
doc3.archived = False
doc3.indexing_status = "completed"
doc3.completed_at = datetime.datetime.now()
# Document 4: Unarchived, will be archived
doc4 = Mock(spec=Document)
doc4.id = "doc-4"
doc4.name = "unarchived_doc.pdf"
doc4.enabled = True
doc4.archived = False
doc4.indexing_status = "completed"
doc4.completed_at = datetime.datetime.now()
# Document 5: Archived, will be unarchived
doc5 = Mock(spec=Document)
doc5.id = "doc-5"
doc5.name = "archived_doc.pdf"
doc5.enabled = True
doc5.archived = True
doc5.indexing_status = "completed"
doc5.completed_at = datetime.datetime.now()
# Document 6: Non-existent, will be skipped
doc6 = None
mock_get_doc.side_effect = [doc1, doc2, doc3, doc4, doc5, doc6]
# Reset module-level Redis mock
redis_mock.reset_mock()
redis_mock.get.return_value = None
# Perform mixed batch operations
DocumentService.batch_update_document_status(
dataset=mock_dataset,
document_ids=["doc-1", "doc-2", "doc-3", "doc-4", "doc-5", "doc-6"],
action="enable", # This will only affect doc1 and doc3 (doc3 will be enabled then disabled)
user=mock_user,
)
# Verify document 1 was enabled
assert doc1.enabled == True
assert doc1.disabled_at is None
assert doc1.disabled_by is None
# Verify document 2 was skipped (already enabled)
assert doc2.enabled == True # No change
# Verify document 3 was skipped (already enabled)
assert doc3.enabled == True
# Verify document 4 was skipped (not affected by enable action)
assert doc4.enabled == True # No change
# Verify document 5 was skipped (not affected by enable action)
assert doc5.enabled == True # No change
# Verify database commits occurred for processed documents
# Only doc1 should be added (doc2, doc3, doc4, doc5 were skipped, doc6 doesn't exist)
assert mock_db.add.call_count == 1
assert mock_db.commit.call_count == 1
# Verify Redis cache operations occurred for processed documents
# Only doc1 should have Redis operations
assert redis_mock.setex.call_count == 1
# Verify async tasks were triggered for processed documents
# Only doc1 should trigger tasks
assert mock_add_task.delay.call_count == 1
# Verify correct Redis cache keys were set
expected_redis_calls = [call("document_doc-1_indexing", 600, 1)]
redis_mock.setex.assert_has_calls(expected_redis_calls)
# Verify correct async task calls
expected_task_calls = [call("doc-1")]
mock_add_task.delay.assert_has_calls(expected_task_calls)