Add nomic embedding model provider (#8640)

This commit is contained in:
ice yao
2024-09-23 19:57:21 +08:00
committed by GitHub
parent 4f69adc8ab
commit d7aada38a1
17 changed files with 506 additions and 2 deletions

View File

@@ -0,0 +1,59 @@
import os
from collections.abc import Callable
from typing import Any, Literal, Union
import pytest
# import monkeypatch
from _pytest.monkeypatch import MonkeyPatch
from nomic import embed
def create_embedding(texts: list[str], model: str, **kwargs: Any) -> dict:
texts_len = len(texts)
foo_embedding_sample = 0.123456
combined = {
"embeddings": [[foo_embedding_sample for _ in range(768)] for _ in range(texts_len)],
"usage": {"prompt_tokens": texts_len, "total_tokens": texts_len},
"model": model,
"inference_mode": "remote",
}
return combined
def mock_nomic(
monkeypatch: MonkeyPatch,
methods: list[Literal["text_embedding"]],
) -> Callable[[], None]:
"""
mock nomic module
:param monkeypatch: pytest monkeypatch fixture
:return: unpatch function
"""
def unpatch() -> None:
monkeypatch.undo()
if "text_embedding" in methods:
monkeypatch.setattr(embed, "text", create_embedding)
return unpatch
MOCK = os.getenv("MOCK_SWITCH", "false").lower() == "true"
@pytest.fixture
def setup_nomic_mock(request, monkeypatch):
methods = request.param if hasattr(request, "param") else []
if MOCK:
unpatch = mock_nomic(monkeypatch, methods=methods)
yield
if MOCK:
unpatch()

View File

@@ -0,0 +1,62 @@
import os
import pytest
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
def test_validate_credentials(setup_nomic_mock):
model = NomicTextEmbeddingModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model="nomic-embed-text-v1.5",
credentials={
"nomic_api_key": "invalid_key",
},
)
model.validate_credentials(
model="nomic-embed-text-v1.5",
credentials={
"nomic_api_key": os.environ.get("NOMIC_API_KEY"),
},
)
@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
def test_invoke_model(setup_nomic_mock):
model = NomicTextEmbeddingModel()
result = model.invoke(
model="nomic-embed-text-v1.5",
credentials={
"nomic_api_key": os.environ.get("NOMIC_API_KEY"),
},
texts=["hello", "world"],
user="foo",
)
assert isinstance(result, TextEmbeddingResult)
assert result.model == "nomic-embed-text-v1.5"
assert len(result.embeddings) == 2
assert result.usage.total_tokens == 2
@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
def test_get_num_tokens(setup_nomic_mock):
model = NomicTextEmbeddingModel()
num_tokens = model.get_num_tokens(
model="nomic-embed-text-v1.5",
credentials={
"nomic_api_key": os.environ.get("NOMIC_API_KEY"),
},
texts=["hello", "world"],
)
assert num_tokens == 2

View File

@@ -0,0 +1,22 @@
import os
import pytest
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.nomic.nomic import NomicAtlasProvider
from core.model_runtime.model_providers.nomic.text_embedding.text_embedding import NomicTextEmbeddingModel
from tests.integration_tests.model_runtime.__mock.nomic_embeddings import setup_nomic_mock
@pytest.mark.parametrize("setup_nomic_mock", [["text_embedding"]], indirect=True)
def test_validate_provider_credentials(setup_nomic_mock):
provider = NomicAtlasProvider()
with pytest.raises(CredentialsValidateFailedError):
provider.validate_provider_credentials(credentials={})
provider.validate_provider_credentials(
credentials={
"nomic_api_key": os.environ.get("NOMIC_API_KEY"),
},
)