From 0ea010d7eeba9badaef6cf1d6f4fd94fffcb1dcd Mon Sep 17 00:00:00 2001 From: lyzno1 <92089059+lyzno1@users.noreply.github.com> Date: Wed, 30 Jul 2025 10:33:24 +0800 Subject: [PATCH] fix: metadata API nullable validation consistency issue (#23133) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/controllers/console/datasets/metadata.py | 8 +- .../service_api/dataset/metadata.py | 8 +- .../services/test_metadata_bug_complete.py | 189 ++++++++++++++++++ .../services/test_metadata_nullable_bug.py | 108 ++++++++++ 4 files changed, 305 insertions(+), 8 deletions(-) create mode 100644 api/tests/unit_tests/services/test_metadata_bug_complete.py create mode 100644 api/tests/unit_tests/services/test_metadata_nullable_bug.py diff --git a/api/controllers/console/datasets/metadata.py b/api/controllers/console/datasets/metadata.py index b1a83aa37..65f76fb40 100644 --- a/api/controllers/console/datasets/metadata.py +++ b/api/controllers/console/datasets/metadata.py @@ -22,8 +22,8 @@ class DatasetMetadataCreateApi(Resource): @marshal_with(dataset_metadata_fields) def post(self, dataset_id): parser = reqparse.RequestParser() - parser.add_argument("type", type=str, required=True, nullable=True, location="json") - parser.add_argument("name", type=str, required=True, nullable=True, location="json") + parser.add_argument("type", type=str, required=True, nullable=False, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") args = parser.parse_args() metadata_args = MetadataArgs(**args) @@ -56,7 +56,7 @@ class DatasetMetadataApi(Resource): @marshal_with(dataset_metadata_fields) def patch(self, dataset_id, metadata_id): parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=True, nullable=True, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") args = parser.parse_args() dataset_id_str = str(dataset_id) @@ -127,7 +127,7 @@ class DocumentMetadataEditApi(Resource): DatasetService.check_dataset_permission(dataset, current_user) parser = reqparse.RequestParser() - parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json") + parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json") args = parser.parse_args() metadata_args = MetadataOperationData(**args) diff --git a/api/controllers/service_api/dataset/metadata.py b/api/controllers/service_api/dataset/metadata.py index 1968696ee..6ba818c5f 100644 --- a/api/controllers/service_api/dataset/metadata.py +++ b/api/controllers/service_api/dataset/metadata.py @@ -17,8 +17,8 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def post(self, tenant_id, dataset_id): parser = reqparse.RequestParser() - parser.add_argument("type", type=str, required=True, nullable=True, location="json") - parser.add_argument("name", type=str, required=True, nullable=True, location="json") + parser.add_argument("type", type=str, required=True, nullable=False, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") args = parser.parse_args() metadata_args = MetadataArgs(**args) @@ -43,7 +43,7 @@ class DatasetMetadataServiceApi(DatasetApiResource): @cloud_edition_billing_rate_limit_check("knowledge", "dataset") def patch(self, tenant_id, dataset_id, metadata_id): parser = reqparse.RequestParser() - parser.add_argument("name", type=str, required=True, nullable=True, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") args = parser.parse_args() dataset_id_str = str(dataset_id) @@ -101,7 +101,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource): DatasetService.check_dataset_permission(dataset, current_user) parser = reqparse.RequestParser() - parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json") + parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json") args = parser.parse_args() metadata_args = MetadataOperationData(**args) diff --git a/api/tests/unit_tests/services/test_metadata_bug_complete.py b/api/tests/unit_tests/services/test_metadata_bug_complete.py new file mode 100644 index 000000000..c4c7579e8 --- /dev/null +++ b/api/tests/unit_tests/services/test_metadata_bug_complete.py @@ -0,0 +1,189 @@ +from unittest.mock import Mock, patch + +import pytest +from flask_restful import reqparse +from werkzeug.exceptions import BadRequest + +from services.entities.knowledge_entities.knowledge_entities import MetadataArgs +from services.metadata_service import MetadataService + + +class TestMetadataBugCompleteValidation: + """Complete test suite to verify the metadata nullable bug and its fix.""" + + def test_1_pydantic_layer_validation(self): + """Test Layer 1: Pydantic model validation correctly rejects None values.""" + # Pydantic should reject None values for required fields + with pytest.raises((ValueError, TypeError)): + MetadataArgs(type=None, name=None) + + with pytest.raises((ValueError, TypeError)): + MetadataArgs(type="string", name=None) + + with pytest.raises((ValueError, TypeError)): + MetadataArgs(type=None, name="test") + + # Valid values should work + valid_args = MetadataArgs(type="string", name="test_name") + assert valid_args.type == "string" + assert valid_args.name == "test_name" + + def test_2_business_logic_layer_crashes_on_none(self): + """Test Layer 2: Business logic crashes when None values slip through.""" + # Create mock that bypasses Pydantic validation + mock_metadata_args = Mock() + mock_metadata_args.name = None + mock_metadata_args.type = "string" + + with patch("services.metadata_service.current_user") as mock_user: + mock_user.current_tenant_id = "tenant-123" + mock_user.id = "user-456" + + # Should crash with TypeError + with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): + MetadataService.create_metadata("dataset-123", mock_metadata_args) + + # Test update method as well + with patch("services.metadata_service.current_user") as mock_user: + mock_user.current_tenant_id = "tenant-123" + mock_user.id = "user-456" + + with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): + MetadataService.update_metadata_name("dataset-123", "metadata-456", None) + + def test_3_database_constraints_verification(self): + """Test Layer 3: Verify database model has nullable=False constraints.""" + from sqlalchemy import inspect + + from models.dataset import DatasetMetadata + + # Get table info + mapper = inspect(DatasetMetadata) + + # Check that type and name columns are not nullable + type_column = mapper.columns["type"] + name_column = mapper.columns["name"] + + assert type_column.nullable is False, "type column should be nullable=False" + assert name_column.nullable is False, "name column should be nullable=False" + + def test_4_fixed_api_layer_rejects_null(self, app): + """Test Layer 4: Fixed API configuration properly rejects null values.""" + # Test Console API create endpoint (fixed) + parser = reqparse.RequestParser() + parser.add_argument("type", type=str, required=True, nullable=False, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") + + with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): + with pytest.raises(BadRequest): + parser.parse_args() + + # Test with just name being null + with app.test_request_context(json={"type": "string", "name": None}, content_type="application/json"): + with pytest.raises(BadRequest): + parser.parse_args() + + # Test with just type being null + with app.test_request_context(json={"type": None, "name": "test"}, content_type="application/json"): + with pytest.raises(BadRequest): + parser.parse_args() + + def test_5_fixed_api_accepts_valid_values(self, app): + """Test that fixed API still accepts valid non-null values.""" + parser = reqparse.RequestParser() + parser.add_argument("type", type=str, required=True, nullable=False, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") + + with app.test_request_context(json={"type": "string", "name": "valid_name"}, content_type="application/json"): + args = parser.parse_args() + assert args["type"] == "string" + assert args["name"] == "valid_name" + + def test_6_simulated_buggy_behavior(self, app): + """Test simulating the original buggy behavior with nullable=True.""" + # Simulate the old buggy configuration + buggy_parser = reqparse.RequestParser() + buggy_parser.add_argument("type", type=str, required=True, nullable=True, location="json") + buggy_parser.add_argument("name", type=str, required=True, nullable=True, location="json") + + with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): + # This would pass in the buggy version + args = buggy_parser.parse_args() + assert args["type"] is None + assert args["name"] is None + + # But would crash when trying to create MetadataArgs + with pytest.raises((ValueError, TypeError)): + MetadataArgs(**args) + + def test_7_end_to_end_validation_layers(self): + """Test all validation layers work together correctly.""" + # Layer 1: API should reject null at parameter level (with fix) + # Layer 2: Pydantic should reject null at model level + # Layer 3: Business logic expects non-null + # Layer 4: Database enforces non-null + + # Test that valid data flows through all layers + valid_data = {"type": "string", "name": "test_metadata"} + + # Should create valid Pydantic object + metadata_args = MetadataArgs(**valid_data) + assert metadata_args.type == "string" + assert metadata_args.name == "test_metadata" + + # Should not crash in business logic length check + assert len(metadata_args.name) <= 255 # This should not crash + assert len(metadata_args.type) > 0 # This should not crash + + def test_8_verify_specific_fix_locations(self): + """Verify that the specific locations mentioned in bug report are fixed.""" + # Read the actual files to verify fixes + import os + + # Console API create + console_create_file = "api/controllers/console/datasets/metadata.py" + if os.path.exists(console_create_file): + with open(console_create_file) as f: + content = f.read() + # Should contain nullable=False, not nullable=True + assert "nullable=True" not in content.split("class DatasetMetadataCreateApi")[1].split("class")[0] + + # Service API create + service_create_file = "api/controllers/service_api/dataset/metadata.py" + if os.path.exists(service_create_file): + with open(service_create_file) as f: + content = f.read() + # Should contain nullable=False, not nullable=True + create_api_section = content.split("class DatasetMetadataCreateServiceApi")[1].split("class")[0] + assert "nullable=True" not in create_api_section + + +class TestMetadataValidationSummary: + """Summary tests that demonstrate the complete validation architecture.""" + + def test_validation_layer_architecture(self): + """Document and test the 4-layer validation architecture.""" + # Layer 1: API Parameter Validation (Flask-RESTful reqparse) + # - Role: First line of defense, validates HTTP request parameters + # - Fixed: nullable=False ensures null values are rejected at API boundary + + # Layer 2: Pydantic Model Validation + # - Role: Validates data structure and types before business logic + # - Working: Required fields without Optional[] reject None values + + # Layer 3: Business Logic Validation + # - Role: Domain-specific validation (length checks, uniqueness, etc.) + # - Vulnerable: Direct len() calls crash on None values + + # Layer 4: Database Constraints + # - Role: Final data integrity enforcement + # - Working: nullable=False prevents None values in database + + # The bug was: Layer 1 allowed None, but Layers 2-4 expected non-None + # The fix: Make Layer 1 consistent with Layers 2-4 + + assert True # This test documents the architecture + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/api/tests/unit_tests/services/test_metadata_nullable_bug.py b/api/tests/unit_tests/services/test_metadata_nullable_bug.py new file mode 100644 index 000000000..ef4d05c1d --- /dev/null +++ b/api/tests/unit_tests/services/test_metadata_nullable_bug.py @@ -0,0 +1,108 @@ +from unittest.mock import Mock, patch + +import pytest +from flask_restful import reqparse + +from services.entities.knowledge_entities.knowledge_entities import MetadataArgs +from services.metadata_service import MetadataService + + +class TestMetadataNullableBug: + """Test case to reproduce the metadata nullable validation bug.""" + + def test_metadata_args_with_none_values_should_fail(self): + """Test that MetadataArgs validation should reject None values.""" + # This test demonstrates the expected behavior - should fail validation + with pytest.raises((ValueError, TypeError)): + # This should fail because Pydantic expects non-None values + MetadataArgs(type=None, name=None) + + def test_metadata_service_create_with_none_name_crashes(self): + """Test that MetadataService.create_metadata crashes when name is None.""" + # Mock the MetadataArgs to bypass Pydantic validation + mock_metadata_args = Mock() + mock_metadata_args.name = None # This will cause len() to crash + mock_metadata_args.type = "string" + + with patch("services.metadata_service.current_user") as mock_user: + mock_user.current_tenant_id = "tenant-123" + mock_user.id = "user-456" + + # This should crash with TypeError when calling len(None) + with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): + MetadataService.create_metadata("dataset-123", mock_metadata_args) + + def test_metadata_service_update_with_none_name_crashes(self): + """Test that MetadataService.update_metadata_name crashes when name is None.""" + with patch("services.metadata_service.current_user") as mock_user: + mock_user.current_tenant_id = "tenant-123" + mock_user.id = "user-456" + + # This should crash with TypeError when calling len(None) + with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): + MetadataService.update_metadata_name("dataset-123", "metadata-456", None) + + def test_api_parser_accepts_null_values(self, app): + """Test that API parser configuration incorrectly accepts null values.""" + # Simulate the current API parser configuration + parser = reqparse.RequestParser() + parser.add_argument("type", type=str, required=True, nullable=True, location="json") + parser.add_argument("name", type=str, required=True, nullable=True, location="json") + + # Simulate request data with null values + with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): + # This should parse successfully due to nullable=True + args = parser.parse_args() + + # Verify that null values are accepted + assert args["type"] is None + assert args["name"] is None + + # This demonstrates the bug: API accepts None but business logic will crash + + def test_integration_bug_scenario(self, app): + """Test the complete bug scenario from API to service layer.""" + # Step 1: API parser accepts null values (current buggy behavior) + parser = reqparse.RequestParser() + parser.add_argument("type", type=str, required=True, nullable=True, location="json") + parser.add_argument("name", type=str, required=True, nullable=True, location="json") + + with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): + args = parser.parse_args() + + # Step 2: Try to create MetadataArgs with None values + # This should fail at Pydantic validation level + with pytest.raises((ValueError, TypeError)): + metadata_args = MetadataArgs(**args) + + # Step 3: If we bypass Pydantic (simulating the bug scenario) + # Move this outside the request context to avoid Flask-Login issues + mock_metadata_args = Mock() + mock_metadata_args.name = None # From args["name"] + mock_metadata_args.type = None # From args["type"] + + with patch("services.metadata_service.current_user") as mock_user: + mock_user.current_tenant_id = "tenant-123" + mock_user.id = "user-456" + + # Step 4: Service layer crashes on len(None) + with pytest.raises(TypeError, match="object of type 'NoneType' has no len"): + MetadataService.create_metadata("dataset-123", mock_metadata_args) + + def test_correct_nullable_false_configuration_works(self, app): + """Test that the correct nullable=False configuration works as expected.""" + # This tests the FIXED configuration + parser = reqparse.RequestParser() + parser.add_argument("type", type=str, required=True, nullable=False, location="json") + parser.add_argument("name", type=str, required=True, nullable=False, location="json") + + with app.test_request_context(json={"type": None, "name": None}, content_type="application/json"): + # This should fail with BadRequest due to nullable=False + from werkzeug.exceptions import BadRequest + + with pytest.raises(BadRequest): + parser.parse_args() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])