From 366ddb05aecd4b9fe7200a18e552b4702e4623b7 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Mon, 16 Jun 2025 11:05:19 +0800 Subject: [PATCH] test: run vdb test of oceanbase with docker compose in CI tests (#20945) --- .github/workflows/expose_service_ports.sh | 1 + .github/workflows/vdb-tests.yml | 16 ++++-- .../vdb/oceanbase/oceanbase_vector.py | 33 +++++++------ .../vdb/oceanbase/check_oceanbase_ready.py | 49 +++++++++++++++++++ .../vdb/oceanbase/test_oceanbase.py | 39 ++------------- docker/docker-compose-template.yaml | 6 +-- docker/docker-compose.yaml | 6 +-- 7 files changed, 89 insertions(+), 61 deletions(-) create mode 100644 api/tests/integration_tests/vdb/oceanbase/check_oceanbase_ready.py diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh index 10d95cb73..01772ccf9 100755 --- a/.github/workflows/expose_service_ports.sh +++ b/.github/workflows/expose_service_ports.sh @@ -10,6 +10,7 @@ yq eval '.services["elasticsearch"].ports += ["9200:9200"]' -i docker/docker-com yq eval '.services.couchbase-server.ports += ["8091-8096:8091-8096"]' -i docker/docker-compose.yaml yq eval '.services.couchbase-server.ports += ["11210:11210"]' -i docker/docker-compose.yaml yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/tidb/docker-compose.yaml +yq eval '.services.oceanbase.ports += ["2881:2881"]' -i docker/docker-compose.yaml yq eval '.services.opengauss.ports += ["6600:6600"]' -i docker/docker-compose.yaml echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase, opengauss" diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index c784817e7..512d14b2e 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -31,6 +31,13 @@ jobs: with: persist-credentials: false + - name: Free Disk Space + uses: endersonmenezes/free-disk-space@v2 + with: + remove_dotnet: true + remove_haskell: true + remove_tool_cache: true + - name: Setup UV and Python uses: ./.github/actions/setup-uv with: @@ -59,7 +66,7 @@ jobs: tidb tiflash - - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase) + - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase, OceanBase) uses: hoverkraft-tech/compose-action@v2.0.2 with: compose-file: | @@ -75,9 +82,12 @@ jobs: pgvector chroma elasticsearch + oceanbase - - name: Check TiDB Ready - run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py + - name: Check VDB Ready (TiDB, Oceanbase) + run: | + uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py + uv run --project api python api/tests/integration_tests/vdb/oceanbase/check_oceanbase_ready.py - name: Test Vector Stores run: uv run --project api bash dev/pytest/pytest_vdb.sh diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index 2b47d179d..dd196e1f0 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -80,6 +80,23 @@ class OceanBaseVector(BaseVector): self.delete() + vals = [] + params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'") + for row in params: + val = int(row[6]) + vals.append(val) + if len(vals) == 0: + raise ValueError("ob_vector_memory_limit_percentage not found in parameters.") + if any(val == 0 for val in vals): + try: + self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30") + except Exception as e: + raise Exception( + "Failed to set ob_vector_memory_limit_percentage. " + + "Maybe the database user has insufficient privilege.", + e, + ) + cols = [ Column("id", String(36), primary_key=True, autoincrement=False), Column("vector", VECTOR(self._vec_dim)), @@ -110,22 +127,6 @@ class OceanBaseVector(BaseVector): + "to support fulltext index and vector index in the same table", e, ) - vals = [] - params = self._client.perform_raw_text_sql("SHOW PARAMETERS LIKE '%ob_vector_memory_limit_percentage%'") - for row in params: - val = int(row[6]) - vals.append(val) - if len(vals) == 0: - raise ValueError("ob_vector_memory_limit_percentage not found in parameters.") - if any(val == 0 for val in vals): - try: - self._client.perform_raw_text_sql("ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30") - except Exception as e: - raise Exception( - "Failed to set ob_vector_memory_limit_percentage. " - + "Maybe the database user has insufficient privilege.", - e, - ) redis_client.set(collection_exist_cache_key, 1, ex=3600) def _check_hybrid_search_support(self) -> bool: diff --git a/api/tests/integration_tests/vdb/oceanbase/check_oceanbase_ready.py b/api/tests/integration_tests/vdb/oceanbase/check_oceanbase_ready.py new file mode 100644 index 000000000..94a51292f --- /dev/null +++ b/api/tests/integration_tests/vdb/oceanbase/check_oceanbase_ready.py @@ -0,0 +1,49 @@ +import time + +import pymysql + + +def check_oceanbase_ready() -> bool: + try: + connection = pymysql.connect( + host="localhost", + port=2881, + user="root", + password="difyai123456", + ) + affected_rows = connection.query("SELECT 1") + return affected_rows == 1 + except Exception as e: + print(f"Oceanbase is not ready. Exception: {e}") + return False + finally: + if connection: + connection.close() + + +def main(): + max_attempts = 50 + retry_interval_seconds = 2 + is_oceanbase_ready = False + for attempt in range(max_attempts): + try: + is_oceanbase_ready = check_oceanbase_ready() + except Exception as e: + print(f"Oceanbase is not ready. Exception: {e}") + is_oceanbase_ready = False + + if is_oceanbase_ready: + break + else: + print(f"Attempt {attempt + 1} failed, retry in {retry_interval_seconds} seconds...") + time.sleep(retry_interval_seconds) + + if is_oceanbase_ready: + print("Oceanbase is ready.") + else: + print(f"Oceanbase is not ready after {max_attempts} attempting checks.") + exit(1) + + +if __name__ == "__main__": + main() diff --git a/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py b/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py index ebcb13416..8fbbbe61b 100644 --- a/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py +++ b/api/tests/integration_tests/vdb/oceanbase/test_oceanbase.py @@ -1,15 +1,11 @@ -from unittest.mock import MagicMock, patch - import pytest from core.rag.datasource.vdb.oceanbase.oceanbase_vector import ( OceanBaseVector, OceanBaseVectorConfig, ) -from tests.integration_tests.vdb.__mock.tcvectordb import setup_tcvectordb_mock from tests.integration_tests.vdb.test_vector_store import ( AbstractVectorTest, - get_example_text, setup_mock_redis, ) @@ -20,10 +16,11 @@ def oceanbase_vector(): "dify_test_collection", config=OceanBaseVectorConfig( host="127.0.0.1", - port="2881", - user="root@test", + port=2881, + user="root", database="test", - password="test", + password="difyai123456", + enable_hybrid_search=True, ), ) @@ -33,39 +30,13 @@ class OceanBaseVectorTest(AbstractVectorTest): super().__init__() self.vector = vector - def search_by_vector(self): - hits_by_vector = self.vector.search_by_vector(query_vector=self.example_embedding) - assert len(hits_by_vector) == 0 - - def search_by_full_text(self): - hits_by_full_text = self.vector.search_by_full_text(query=get_example_text()) - assert len(hits_by_full_text) == 0 - - def text_exists(self): - exist = self.vector.text_exists(self.example_doc_id) - assert exist == True - def get_ids_by_metadata_field(self): ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id) - assert len(ids) == 0 - - -@pytest.fixture -def setup_mock_oceanbase_client(): - with patch("core.rag.datasource.vdb.oceanbase.oceanbase_vector.ObVecClient", new_callable=MagicMock) as mock_client: - yield mock_client - - -@pytest.fixture -def setup_mock_oceanbase_vector(oceanbase_vector): - with patch.object(oceanbase_vector, "_client"): - yield oceanbase_vector + assert len(ids) == 1 def test_oceanbase_vector( setup_mock_redis, - setup_mock_oceanbase_client, - setup_mock_oceanbase_vector, oceanbase_vector, ): OceanBaseVectorTest(oceanbase_vector).run_all_tests() diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index 648b82c91..1462957a9 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -435,7 +435,7 @@ services: # OceanBase vector database oceanbase: - image: oceanbase/oceanbase-ce:4.3.5.1-101000042025031818 + image: oceanbase/oceanbase-ce:4.3.5-lts container_name: oceanbase profiles: - oceanbase @@ -450,9 +450,7 @@ services: OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} OB_SERVER_IP: 127.0.0.1 - MODE: MINI - ports: - - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + MODE: mini # Oracle vector database oracle: diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index ec910f625..1f6601734 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -942,7 +942,7 @@ services: # OceanBase vector database oceanbase: - image: oceanbase/oceanbase-ce:4.3.5.1-101000042025031818 + image: oceanbase/oceanbase-ce:4.3.5-lts container_name: oceanbase profiles: - oceanbase @@ -957,9 +957,7 @@ services: OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} OB_SERVER_IP: 127.0.0.1 - MODE: MINI - ports: - - "${OCEANBASE_VECTOR_PORT:-2881}:2881" + MODE: mini # Oracle vector database oracle: