Model Runtime (#1858)

Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn>
2024-01-02 23:42:00 +08:00
parent e91dd28a76
commit d069c668f8
807 changed files with 171310 additions and 23806 deletions
--- a/api/tests/integration_tests/model_runtime/xinference/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/xinference/test_llm.py
@@ -0,0 +1,392 @@
+import os
+import pytest
+
+from typing import Generator
+
+from core.model_runtime.entities.message_entities import AssistantPromptMessage, TextPromptMessageContent, UserPromptMessage, \
+    SystemPromptMessage, PromptMessageTool
+from core.model_runtime.entities.model_entities import AIModelEntity
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunkDelta, \
+    LLMResultChunk
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
+
+"""FOR MOCK FIXTURES, DO NOT REMOVE"""
+from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
+from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='ChatGLM3',
+            credentials={
+                'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+                'model_uid': 'www ' + os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+            }
+        )
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='aaaaa',
+            credentials={
+                'server_url': '',
+                'model_uid': ''
+            }
+        )
+
+    model.validate_credentials(
+        model='ChatGLM3',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    response = model.invoke(
+        model='ChatGLM3',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.7,
+            'top_p': 1.0,
+        },
+        stop=['you'],
+        user="abc-123",
+        stream=False
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)
+def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    response = model.invoke(
+        model='ChatGLM3',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.7,
+            'top_p': 1.0,
+        },
+        stop=['you'],
+        stream=True,
+        user="abc-123"
+    )
+
+    assert isinstance(response, Generator)
+    for chunk in response:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+"""
+    Funtion calling of xinference does not support stream mode currently
+"""
+# def test_invoke_stream_chat_model_with_functions():
+#     model = XinferenceAILargeLanguageModel()
+
+#     response = model.invoke(
+#         model='ChatGLM3-6b',
+#         credentials={
+#             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+#             'model_type': 'text-generation',
+#             'model_name': 'ChatGLM3',
+#             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+#         },
+#         prompt_messages=[
+#             SystemPromptMessage(
+#                 content='你是一个天气机器人，可以通过调用函数来获取天气信息',
+#             ),
+#             UserPromptMessage(
+#                 content='波士顿天气如何？'
+#             )
+#         ],
+#         model_parameters={
+#             'temperature': 0,
+#             'top_p': 1.0,
+#         },
+#         stop=['you'],
+#         user='abc-123',
+#         stream=True,
+#         tools=[
+#             PromptMessageTool(
+#                 name='get_current_weather',
+#                 description='Get the current weather in a given location',
+#                 parameters={
+#                     "type": "object",
+#                     "properties": {
+#                         "location": {
+#                         "type": "string",
+#                             "description": "The city and state e.g. San Francisco, CA"
+#                         },
+#                         "unit": {
+#                             "type": "string",
+#                             "enum": ["celsius", "fahrenheit"]
+#                         }
+#                     },
+#                     "required": [
+#                         "location"
+#                     ]
+#                 }
+#             )
+#         ]
+#     )
+
+#     assert isinstance(response, Generator)
+    
+#     call: LLMResultChunk = None
+#     chunks = []
+
+#     for chunk in response:
+#         chunks.append(chunk)
+#         assert isinstance(chunk, LLMResultChunk)
+#         assert isinstance(chunk.delta, LLMResultChunkDelta)
+#         assert isinstance(chunk.delta.message, AssistantPromptMessage)
+#         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+
+#         if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
+#             call = chunk
+#             break
+
+#     assert call is not None
+#     assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
+
+# def test_invoke_chat_model_with_functions():
+#     model = XinferenceAILargeLanguageModel()
+
+#     response = model.invoke(
+#         model='ChatGLM3-6b',
+#         credentials={
+#             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+#             'model_type': 'text-generation',
+#             'model_name': 'ChatGLM3',
+#             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
+#         },
+#         prompt_messages=[
+#             UserPromptMessage(
+#                 content='What is the weather like in San Francisco?'
+#             )
+#         ],
+#         model_parameters={
+#             'temperature': 0.7,
+#             'top_p': 1.0,
+#         },
+#         stop=['you'],
+#         user='abc-123',
+#         stream=False,
+#         tools=[
+#             PromptMessageTool(
+#                 name='get_current_weather',
+#                 description='Get the current weather in a given location',
+#                 parameters={
+#                     "type": "object",
+#                     "properties": {
+#                         "location": {
+#                         "type": "string",
+#                             "description": "The city and state e.g. San Francisco, CA"
+#                         },
+#                         "unit": {
+#                             "type": "string",
+#                             "enum": [
+#                                 "c",
+#                                 "f"
+#                             ]
+#                         }
+#                     },
+#                     "required": [
+#                         "location"
+#                     ]
+#                 }
+#             )
+#         ]
+#     )
+
+#     assert isinstance(response, LLMResult)
+#     assert len(response.message.content) > 0
+#     assert response.usage.total_tokens > 0
+#     assert response.message.tool_calls[0].function.name == 'get_current_weather'
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='alapaca',
+            credentials={
+                'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+                'model_uid': 'www ' + os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+            }
+        )
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model='alapaca',
+            credentials={
+                'server_url': '',
+                'model_uid': ''
+            }
+        )
+
+    model.validate_credentials(
+        model='alapaca',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+        }
+    )
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    response = model.invoke(
+        model='alapaca',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+        },
+        prompt_messages=[
+            UserPromptMessage(
+                content='the United States is'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.7,
+            'top_p': 1.0,
+        },
+        stop=['you'],
+        user="abc-123",
+        stream=False
+    )
+
+    assert isinstance(response, LLMResult)
+    assert len(response.message.content) > 0
+    assert response.usage.total_tokens > 0
+
+@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)
+def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
+    model = XinferenceAILargeLanguageModel()
+
+    response = model.invoke(
+        model='alapaca',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+        },
+        prompt_messages=[
+            UserPromptMessage(
+                content='the United States is'
+            )
+        ],
+        model_parameters={
+            'temperature': 0.7,
+            'top_p': 1.0,
+        },
+        stop=['you'],
+        stream=True,
+        user="abc-123"
+    )
+
+    assert isinstance(response, Generator)
+    for chunk in response:
+        assert isinstance(chunk, LLMResultChunk)
+        assert isinstance(chunk.delta, LLMResultChunkDelta)
+        assert isinstance(chunk.delta.message, AssistantPromptMessage)
+        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
+
+def test_get_num_tokens():
+    model = XinferenceAILargeLanguageModel()
+
+    num_tokens = model.get_num_tokens(
+        model='ChatGLM3',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+        tools=[
+            PromptMessageTool(
+                name='get_current_weather',
+                description='Get the current weather in a given location',
+                parameters={
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                        "type": "string",
+                            "description": "The city and state e.g. San Francisco, CA"
+                        },
+                        "unit": {
+                            "type": "string",
+                            "enum": [
+                                "c",
+                                "f"
+                            ]
+                        }
+                    },
+                    "required": [
+                        "location"
+                    ]
+                }
+            )
+        ]
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 77
+
+    num_tokens = model.get_num_tokens(
+        model='ChatGLM3',
+        credentials={
+            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
+            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')
+        },
+        prompt_messages=[
+            SystemPromptMessage(
+                content='You are a helpful AI assistant.',
+            ),
+            UserPromptMessage(
+                content='Hello World!'
+            )
+        ],
+    )
+
+    assert isinstance(num_tokens, int)
+    assert num_tokens == 21