feat: add gpustack model provider (#10158)

This commit is contained in:
Lawrence Li
2024-11-01 17:23:30 +08:00
committed by GitHub
parent 3c85136279
commit 76b0328eb1
17 changed files with 705 additions and 1 deletions

View File

@@ -0,0 +1,49 @@
import os
import pytest
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.gpustack.text_embedding.text_embedding import (
GPUStackTextEmbeddingModel,
)
def test_validate_credentials():
model = GPUStackTextEmbeddingModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model="bge-m3",
credentials={
"endpoint_url": "invalid_url",
"api_key": "invalid_api_key",
},
)
model.validate_credentials(
model="bge-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
},
)
def test_invoke_model():
model = GPUStackTextEmbeddingModel()
result = model.invoke(
model="bge-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"context_size": 8192,
},
texts=["hello", "world"],
user="abc-123",
)
assert isinstance(result, TextEmbeddingResult)
assert len(result.embeddings) == 2
assert result.usage.total_tokens == 7

View File

@@ -0,0 +1,162 @@
import os
from collections.abc import Generator
import pytest
from core.model_runtime.entities.llm_entities import (
LLMResult,
LLMResultChunk,
LLMResultChunkDelta,
)
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessageTool,
SystemPromptMessage,
UserPromptMessage,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.gpustack.llm.llm import GPUStackLanguageModel
def test_validate_credentials_for_chat_model():
model = GPUStackLanguageModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model="llama-3.2-1b-instruct",
credentials={
"endpoint_url": "invalid_url",
"api_key": "invalid_api_key",
"mode": "chat",
},
)
model.validate_credentials(
model="llama-3.2-1b-instruct",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "chat",
},
)
def test_invoke_completion_model():
model = GPUStackLanguageModel()
response = model.invoke(
model="llama-3.2-1b-instruct",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "completion",
},
prompt_messages=[UserPromptMessage(content="ping")],
model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
stop=[],
user="abc-123",
stream=False,
)
assert isinstance(response, LLMResult)
assert len(response.message.content) > 0
assert response.usage.total_tokens > 0
def test_invoke_chat_model():
model = GPUStackLanguageModel()
response = model.invoke(
model="llama-3.2-1b-instruct",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "chat",
},
prompt_messages=[UserPromptMessage(content="ping")],
model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
stop=[],
user="abc-123",
stream=False,
)
assert isinstance(response, LLMResult)
assert len(response.message.content) > 0
assert response.usage.total_tokens > 0
def test_invoke_stream_chat_model():
model = GPUStackLanguageModel()
response = model.invoke(
model="llama-3.2-1b-instruct",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "chat",
},
prompt_messages=[UserPromptMessage(content="Hello World!")],
model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
stop=["you"],
stream=True,
user="abc-123",
)
assert isinstance(response, Generator)
for chunk in response:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
def test_get_num_tokens():
model = GPUStackLanguageModel()
num_tokens = model.get_num_tokens(
model="????",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "chat",
},
prompt_messages=[
SystemPromptMessage(
content="You are a helpful AI assistant.",
),
UserPromptMessage(content="Hello World!"),
],
tools=[
PromptMessageTool(
name="get_current_weather",
description="Get the current weather in a given location",
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["c", "f"]},
},
"required": ["location"],
},
)
],
)
assert isinstance(num_tokens, int)
assert num_tokens == 80
num_tokens = model.get_num_tokens(
model="????",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
"mode": "chat",
},
prompt_messages=[UserPromptMessage(content="Hello World!")],
)
assert isinstance(num_tokens, int)
assert num_tokens == 10

View File

@@ -0,0 +1,107 @@
import os
import pytest
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.gpustack.rerank.rerank import (
GPUStackRerankModel,
)
def test_validate_credentials_for_rerank_model():
model = GPUStackRerankModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model="bge-reranker-v2-m3",
credentials={
"endpoint_url": "invalid_url",
"api_key": "invalid_api_key",
},
)
model.validate_credentials(
model="bge-reranker-v2-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
},
)
def test_invoke_rerank_model():
model = GPUStackRerankModel()
response = model.invoke(
model="bge-reranker-v2-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
},
query="Organic skincare products for sensitive skin",
docs=[
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"Yoga mats made from recycled materials",
],
top_n=3,
score_threshold=-0.75,
user="abc-123",
)
assert isinstance(response, RerankResult)
assert len(response.docs) == 3
def test__invoke():
model = GPUStackRerankModel()
# Test case 1: Empty docs
result = model._invoke(
model="bge-reranker-v2-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
},
query="Organic skincare products for sensitive skin",
docs=[],
top_n=3,
score_threshold=0.75,
user="abc-123",
)
assert isinstance(result, RerankResult)
assert len(result.docs) == 0
# Test case 2: Expected docs
result = model._invoke(
model="bge-reranker-v2-m3",
credentials={
"endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
"api_key": os.environ.get("GPUSTACK_API_KEY"),
},
query="Organic skincare products for sensitive skin",
docs=[
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"Yoga mats made from recycled materials",
],
top_n=3,
score_threshold=-0.75,
user="abc-123",
)
assert isinstance(result, RerankResult)
assert len(result.docs) == 3
assert all(isinstance(doc, RerankDocument) for doc in result.docs)