feat: add upstash as a new vector database provider (#9644)
This commit is contained in:
75
api/tests/integration_tests/vdb/__mock/upstashvectordb.py
Normal file
75
api/tests/integration_tests/vdb/__mock/upstashvectordb.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
from upstash_vector import Index
|
||||
|
||||
|
||||
# Mocking the Index class from upstash_vector
|
||||
class MockIndex:
|
||||
def __init__(self, url="", token=""):
|
||||
self.url = url
|
||||
self.token = token
|
||||
self.vectors = []
|
||||
|
||||
def upsert(self, vectors):
|
||||
for vector in vectors:
|
||||
vector.score = 0.5
|
||||
self.vectors.append(vector)
|
||||
return {"code": 0, "msg": "operation success", "affectedCount": len(vectors)}
|
||||
|
||||
def fetch(self, ids):
|
||||
return [vector for vector in self.vectors if vector.id in ids]
|
||||
|
||||
def delete(self, ids):
|
||||
self.vectors = [vector for vector in self.vectors if vector.id not in ids]
|
||||
return {"code": 0, "msg": "Success"}
|
||||
|
||||
def query(
|
||||
self,
|
||||
vector: None,
|
||||
top_k: int = 10,
|
||||
include_vectors: bool = False,
|
||||
include_metadata: bool = False,
|
||||
filter: str = "",
|
||||
data: Optional[str] = None,
|
||||
namespace: str = "",
|
||||
include_data: bool = False,
|
||||
):
|
||||
# Simple mock query, in real scenario you would calculate similarity
|
||||
mock_result = []
|
||||
for vector_data in self.vectors:
|
||||
mock_result.append(vector_data)
|
||||
return mock_result[:top_k]
|
||||
|
||||
def reset(self):
|
||||
self.vectors = []
|
||||
|
||||
def info(self):
|
||||
return AttrDict({"dimension": 1024})
|
||||
|
||||
|
||||
class AttrDict(dict):
|
||||
def __getattr__(self, item):
|
||||
return self.get(item)
|
||||
|
||||
|
||||
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_upstashvector_mock(request, monkeypatch: MonkeyPatch):
|
||||
if MOCK:
|
||||
monkeypatch.setattr(Index, "__init__", MockIndex.__init__)
|
||||
monkeypatch.setattr(Index, "upsert", MockIndex.upsert)
|
||||
monkeypatch.setattr(Index, "fetch", MockIndex.fetch)
|
||||
monkeypatch.setattr(Index, "delete", MockIndex.delete)
|
||||
monkeypatch.setattr(Index, "query", MockIndex.query)
|
||||
monkeypatch.setattr(Index, "reset", MockIndex.reset)
|
||||
monkeypatch.setattr(Index, "info", MockIndex.info)
|
||||
|
||||
yield
|
||||
|
||||
if MOCK:
|
||||
monkeypatch.undo()
|
0
api/tests/integration_tests/vdb/upstash/__init__.py
Normal file
0
api/tests/integration_tests/vdb/upstash/__init__.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from core.rag.datasource.vdb.upstash.upstash_vector import UpstashVector, UpstashVectorConfig
|
||||
from core.rag.models.document import Document
|
||||
from tests.integration_tests.vdb.__mock.upstashvectordb import setup_upstashvector_mock
|
||||
from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest
|
||||
|
||||
|
||||
def get_example_text() -> str:
|
||||
return "test_text"
|
||||
|
||||
|
||||
def get_example_document(doc_id: str) -> Document:
|
||||
doc = Document(
|
||||
page_content=get_example_text(),
|
||||
metadata={
|
||||
"doc_id": doc_id,
|
||||
"doc_hash": doc_id,
|
||||
"document_id": doc_id,
|
||||
"dataset_id": doc_id,
|
||||
},
|
||||
)
|
||||
return doc
|
||||
|
||||
|
||||
class UpstashVectorTest(AbstractVectorTest):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.vector = UpstashVector(
|
||||
collection_name="test_collection",
|
||||
config=UpstashVectorConfig(
|
||||
url="your-server-url",
|
||||
token="your-access-token",
|
||||
),
|
||||
)
|
||||
self.example_embedding = [1.001 * i for i in range(self.vector._get_index_dimension())]
|
||||
|
||||
def add_texts(self) -> list[str]:
|
||||
batch_size = 1
|
||||
documents = [get_example_document(doc_id=str(uuid.uuid4())) for _ in range(batch_size)]
|
||||
embeddings = [self.example_embedding] * batch_size
|
||||
self.vector.add_texts(documents=documents, embeddings=embeddings)
|
||||
return [doc.metadata["doc_id"] for doc in documents]
|
||||
|
||||
def get_ids_by_metadata_field(self):
|
||||
print("doc_id", self.example_doc_id)
|
||||
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
|
||||
assert len(ids) != 0
|
||||
|
||||
def run_all_tests(self):
|
||||
self.create_vector()
|
||||
time.sleep(1)
|
||||
self.search_by_vector()
|
||||
self.text_exists()
|
||||
self.get_ids_by_metadata_field()
|
||||
added_doc_ids = self.add_texts()
|
||||
self.delete_by_ids(added_doc_ids + [self.example_doc_id])
|
||||
self.delete_vector()
|
||||
|
||||
|
||||
def test_upstash_vector(setup_upstashvector_mock):
|
||||
UpstashVectorTest().run_all_tests()
|
Reference in New Issue
Block a user