diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index d6dfe967d..8efe105bb 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -4,8 +4,8 @@ import math from typing import Any from pydantic import BaseModel, model_validator -from pyobvector import VECTOR, ObVecClient # type: ignore -from sqlalchemy import JSON, Column, String, func +from pyobvector import VECTOR, FtsIndexParam, FtsParser, ObVecClient, l2_distance # type: ignore +from sqlalchemy import JSON, Column, String from sqlalchemy.dialects.mysql import LONGTEXT from configs import dify_config @@ -119,14 +119,21 @@ class OceanBaseVector(BaseVector): ) try: if self._hybrid_search_enabled: - self._client.perform_raw_text_sql(f"""ALTER TABLE {self._collection_name} - ADD FULLTEXT INDEX fulltext_index_for_col_text (text) WITH PARSER ik""") + self._client.create_fts_idx_with_fts_index_param( + table_name=self._collection_name, + fts_idx_param=FtsIndexParam( + index_name="fulltext_index_for_col_text", + field_names=["text"], + parser_type=FtsParser.IK, + ), + ) except Exception as e: raise Exception( "Failed to add fulltext index to the target table, your OceanBase version must be 4.3.5.1 or above " + "to support fulltext index and vector index in the same table", e, ) + self._client.refresh_metadata([self._collection_name]) redis_client.set(collection_exist_cache_key, 1, ex=3600) def _check_hybrid_search_support(self) -> bool: @@ -252,7 +259,7 @@ class OceanBaseVector(BaseVector): vec_column_name="vector", vec_data=query_vector, topk=topk, - distance_func=func.l2_distance, + distance_func=l2_distance, output_column_names=["text", "metadata"], with_dist=True, where_clause=_where_clause, diff --git a/api/pyproject.toml b/api/pyproject.toml index 61a725a83..ce642aa9c 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -205,7 +205,7 @@ vdb = [ "pgvector==0.2.5", "pymilvus~=2.5.0", "pymochow==1.3.1", - "pyobvector~=0.1.6", + "pyobvector~=0.2.15", "qdrant-client==1.9.0", "tablestore==6.2.0", "tcvectordb~=1.6.4", diff --git a/api/uv.lock b/api/uv.lock index cecce2bc4..40091c297 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1602,7 +1602,7 @@ vdb = [ { name = "pgvector", specifier = "==0.2.5" }, { name = "pymilvus", specifier = "~=2.5.0" }, { name = "pymochow", specifier = "==1.3.1" }, - { name = "pyobvector", specifier = "~=0.1.6" }, + { name = "pyobvector", specifier = "~=0.2.15" }, { name = "qdrant-client", specifier = "==1.9.0" }, { name = "tablestore", specifier = "==6.2.0" }, { name = "tcvectordb", specifier = "~=1.6.4" }, @@ -4569,17 +4569,19 @@ wheels = [ [[package]] name = "pyobvector" -version = "0.1.14" +version = "0.2.15" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiomysql" }, { name = "numpy" }, + { name = "pydantic" }, { name = "pymysql" }, { name = "sqlalchemy" }, + { name = "sqlglot" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/dc/59/7d762061808948dd6aad165a000b34e22163dc83fb5014184eeacc0fabe5/pyobvector-0.1.14.tar.gz", hash = "sha256:4f85cdd63064d040e94c0a96099a0cd5cda18ce625865382e89429f28422fc02", size = 26780, upload-time = "2024-11-20T11:46:18.017Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/7d/3f3aac6acf1fdd1782042d6eecd48efaa2ee355af0dbb61e93292d629391/pyobvector-0.2.15.tar.gz", hash = "sha256:5de258c1e952c88b385b5661e130c1cf8262c498c1f8a4a348a35962d379fce4", size = 39611, upload-time = "2025-08-18T02:49:26.683Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/68/ecb21b74c974e7be7f9034e205d08db62d614ff5c221581ae96d37ef853e/pyobvector-0.1.14-py3-none-any.whl", hash = "sha256:828e0bec49a177355b70c7a1270af3b0bf5239200ee0d096e4165b267eeff97c", size = 35526, upload-time = "2024-11-20T11:46:16.809Z" }, + { url = "https://files.pythonhosted.org/packages/5f/1f/a62754ba9b8a02c038d2a96cb641b71d3809f34d2ba4f921fecd7840d7fb/pyobvector-0.2.15-py3-none-any.whl", hash = "sha256:feeefe849ee5400e72a9a4d3844e425a58a99053dd02abe06884206923065ebb", size = 52680, upload-time = "2025-08-18T02:49:25.452Z" }, ] [[package]] @@ -5432,6 +5434,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, ] +[[package]] +name = "sqlglot" +version = "26.33.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/25/9d/fcd59b4612d5ad1e2257c67c478107f073b19e1097d3bfde2fb517884416/sqlglot-26.33.0.tar.gz", hash = "sha256:2817278779fa51d6def43aa0d70690b93a25c83eb18ec97130fdaf707abc0d73", size = 5353340, upload-time = "2025-07-01T13:09:06.311Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/8d/f1d9cb5b18e06aa45689fbeaaea6ebab66d5f01d1e65029a8f7657c06be5/sqlglot-26.33.0-py3-none-any.whl", hash = "sha256:031cee20c0c796a83d26d079a47fdce667604df430598c7eabfa4e4dfd147033", size = 477610, upload-time = "2025-07-01T13:09:03.926Z" }, +] + [[package]] name = "sseclient-py" version = "1.8.0"