feat: add gpustack model provider (#10158)

2024-11-01 17:23:30 +08:00
parent 3c85136279
commit 76b0328eb1
17 changed files with 705 additions and 1 deletions
--- a/api/tests/integration_tests/model_runtime/gpustack/init.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/init.py
--- a/api/tests/integration_tests/model_runtime/gpustack/test_embedding.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_embedding.py
@@ -0,0 +1,49 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.gpustack.text_embedding.text_embedding import (
+    GPUStackTextEmbeddingModel,
+)
+
+
+def test_validate_credentials():
+    model = GPUStackTextEmbeddingModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="bge-m3",
+            credentials={
+                "endpoint_url": "invalid_url",
+                "api_key": "invalid_api_key",
+            },
+        )
+
+    model.validate_credentials(
+        model="bge-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+    )
+
+
+def test_invoke_model():
+    model = GPUStackTextEmbeddingModel()
+
+    result = model.invoke(
+        model="bge-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "context_size": 8192,
+        },
+        texts=["hello", "world"],
+        user="abc-123",
+    )
+
+    assert isinstance(result, TextEmbeddingResult)
+    assert len(result.embeddings) == 2
+    assert result.usage.total_tokens == 7
--- a/api/tests/integration_tests/model_runtime/gpustack/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_llm.py
@@ -0,0 +1,162 @@
+import os
+from collections.abc import Generator
+
+import pytest
+
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+)
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+    SystemPromptMessage,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.gpustack.llm.llm import GPUStackLanguageModel
+
+
+def test_validate_credentials_for_chat_model():
+    model = GPUStackLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="llama-3.2-1b-instruct",
+            credentials={
+                "endpoint_url": "invalid_url",
+                "api_key": "invalid_api_key",
+                "mode": "chat",
+            },
+        )
+
+    model.validate_credentials(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+    )
+
+
+def test_invoke_completion_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "completion",
+        },
+        prompt_messages=[UserPromptMessage(content="ping")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=[],
+        user="abc-123",
+        stream=False,
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+
+def test_invoke_chat_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="ping")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=[],
+        user="abc-123",
+        stream=False,
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+
+def test_invoke_stream_chat_model():
+    model = GPUStackLanguageModel()
+
+    response = model.invoke(
+        model="llama-3.2-1b-instruct",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="Hello World!")],
+        model_parameters={"temperature": 0.7, "top_p": 1.0, "max_tokens": 10},
+        stop=["you"],
+        stream=True,
+        user="abc-123",
+    )
+
+    assert isinstance(response, Generator)
+    for chunk in response:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+
+
+def test_get_num_tokens():
+    model = GPUStackLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model="????",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content="You are a helpful AI assistant.",
+            ),
+            UserPromptMessage(content="Hello World!"),
+        ],
+        tools=[
+            PromptMessageTool(
+                name="get_current_weather",
+                description="Get the current weather in a given location",
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["c", "f"]},
+                    },
+                    "required": ["location"],
+                },
+            )
+        ],
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 80
+
+    num_tokens = model.get_num_tokens(
+        model="????",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+            "mode": "chat",
+        },
+        prompt_messages=[UserPromptMessage(content="Hello World!")],
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 10
--- a/api/tests/integration_tests/model_runtime/gpustack/test_rerank.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_rerank.py
@@ -0,0 +1,107 @@
+import os
+
+import pytest
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.gpustack.rerank.rerank import (
+    GPUStackRerankModel,
+)
+
+
+def test_validate_credentials_for_rerank_model():
+    model = GPUStackRerankModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="bge-reranker-v2-m3",
+            credentials={
+                "endpoint_url": "invalid_url",
+                "api_key": "invalid_api_key",
+            },
+        )
+
+    model.validate_credentials(
+        model="bge-reranker-v2-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+    )
+
+
+def test_invoke_rerank_model():
+    model = GPUStackRerankModel()
+
+    response = model.invoke(
+        model="bge-reranker-v2-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+        query="Organic skincare products for sensitive skin",
+        docs=[
+            "Eco-friendly kitchenware for modern homes",
+            "Biodegradable cleaning supplies for eco-conscious consumers",
+            "Organic cotton baby clothes for sensitive skin",
+            "Natural organic skincare range for sensitive skin",
+            "Tech gadgets for smart homes: 2024 edition",
+            "Sustainable gardening tools and compost solutions",
+            "Sensitive skin-friendly facial cleansers and toners",
+            "Organic food wraps and storage solutions",
+            "Yoga mats made from recycled materials",
+        ],
+        top_n=3,
+        score_threshold=-0.75,
+        user="abc-123",
+    )
+
+    assert isinstance(response, RerankResult)
+    assert len(response.docs) == 3
+
+
+def test__invoke():
+    model = GPUStackRerankModel()
+
+    # Test case 1: Empty docs
+    result = model._invoke(
+        model="bge-reranker-v2-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+        query="Organic skincare products for sensitive skin",
+        docs=[],
+        top_n=3,
+        score_threshold=0.75,
+        user="abc-123",
+    )
+    assert isinstance(result, RerankResult)
+    assert len(result.docs) == 0
+
+    # Test case 2: Expected docs
+    result = model._invoke(
+        model="bge-reranker-v2-m3",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+        query="Organic skincare products for sensitive skin",
+        docs=[
+            "Eco-friendly kitchenware for modern homes",
+            "Biodegradable cleaning supplies for eco-conscious consumers",
+            "Organic cotton baby clothes for sensitive skin",
+            "Natural organic skincare range for sensitive skin",
+            "Tech gadgets for smart homes: 2024 edition",
+            "Sustainable gardening tools and compost solutions",
+            "Sensitive skin-friendly facial cleansers and toners",
+            "Organic food wraps and storage solutions",
+            "Yoga mats made from recycled materials",
+        ],
+        top_n=3,
+        score_threshold=-0.75,
+        user="abc-123",
+    )
+    assert isinstance(result, RerankResult)
+    assert len(result.docs) == 3
+    assert all(isinstance(doc, RerankDocument) for doc in result.docs)