Feat/assistant app (#2086)

Co-authored-by: chenhe <guchenhe@gmail.com>
Co-authored-by: Pascal M <11357019+perzeuss@users.noreply.github.com>
This commit is contained in:
Yeuoly
2024-01-23 19:58:23 +08:00
committed by GitHub
parent 7bbe12b2bd
commit 86286e1ac8
175 changed files with 11619 additions and 1235 deletions

View File

@@ -0,0 +1,2 @@
class InvokeModelError(Exception):
pass

View File

@@ -0,0 +1,174 @@
"""
For some reason, model will be used in tools like WebScraperTool, WikipediaSearchTool etc.
Therefore, a model manager is needed to list/invoke/validate models.
"""
from core.model_runtime.entities.message_entities import PromptMessage
from core.model_runtime.entities.llm_entities import LLMResult
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel, ModelPropertyKey
from core.model_runtime.errors.invoke import InvokeRateLimitError, InvokeBadRequestError, \
InvokeConnectionError, InvokeAuthorizationError, InvokeServerUnavailableError
from core.model_runtime.utils.encoders import jsonable_encoder
from core.model_manager import ModelManager
from core.tools.model.errors import InvokeModelError
from extensions.ext_database import db
from models.tools import ToolModelInvoke
from typing import List, cast
import json
class ToolModelManager:
@staticmethod
def get_max_llm_context_tokens(
tenant_id: str,
) -> int:
"""
get max llm context tokens of the model
"""
model_manager = ModelManager()
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id, model_type=ModelType.LLM,
)
if not model_instance:
raise InvokeModelError(f'Model not found')
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
schema = llm_model.get_model_schema(model_instance.model, model_instance.credentials)
if not schema:
raise InvokeModelError(f'No model schema found')
max_tokens = schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE, None)
if max_tokens is None:
return 2048
return max_tokens
@staticmethod
def calculate_tokens(
tenant_id: str,
prompt_messages: List[PromptMessage]
) -> int:
"""
calculate tokens from prompt messages and model parameters
"""
# get model instance
model_manager = ModelManager()
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id, model_type=ModelType.LLM
)
if not model_instance:
raise InvokeModelError(f'Model not found')
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
# get tokens
tokens = llm_model.get_num_tokens(model_instance.model, model_instance.credentials, prompt_messages)
return tokens
@staticmethod
def invoke(
user_id: str, tenant_id: str,
tool_type: str, tool_name: str,
prompt_messages: List[PromptMessage]
) -> LLMResult:
"""
invoke model with parameters in user's own context
:param user_id: user id
:param tenant_id: tenant id, the tenant id of the creator of the tool
:param tool_provider: tool provider
:param tool_id: tool id
:param tool_name: tool name
:param provider: model provider
:param model: model name
:param model_parameters: model parameters
:param prompt_messages: prompt messages
:return: AssistantPromptMessage
"""
# get model manager
model_manager = ModelManager()
# get model instance
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id, model_type=ModelType.LLM,
)
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
# get model credentials
model_credentials = model_instance.credentials
# get prompt tokens
prompt_tokens = llm_model.get_num_tokens(model_instance.model, model_credentials, prompt_messages)
model_parameters = {
'temperature': 0.8,
'top_p': 0.8,
}
# create tool model invoke
tool_model_invoke = ToolModelInvoke(
user_id=user_id,
tenant_id=tenant_id,
provider=model_instance.provider,
tool_type=tool_type,
tool_name=tool_name,
model_parameters=json.dumps(model_parameters),
prompt_messages=json.dumps(jsonable_encoder(prompt_messages)),
model_response='',
prompt_tokens=prompt_tokens,
answer_tokens=0,
answer_unit_price=0,
answer_price_unit=0,
provider_response_latency=0,
total_price=0,
currency='USD',
)
db.session.add(tool_model_invoke)
db.session.commit()
try:
response: LLMResult = llm_model.invoke(
model=model_instance.model,
credentials=model_credentials,
prompt_messages=prompt_messages,
model_parameters=model_parameters,
tools=[], stop=[], stream=False, user=user_id, callbacks=[]
)
except InvokeRateLimitError as e:
raise InvokeModelError(f'Invoke rate limit error: {e}')
except InvokeBadRequestError as e:
raise InvokeModelError(f'Invoke bad request error: {e}')
except InvokeConnectionError as e:
raise InvokeModelError(f'Invoke connection error: {e}')
except InvokeAuthorizationError as e:
raise InvokeModelError(f'Invoke authorization error')
except InvokeServerUnavailableError as e:
raise InvokeModelError(f'Invoke server unavailable error: {e}')
except Exception as e:
raise InvokeModelError(f'Invoke error: {e}')
# update tool model invoke
tool_model_invoke.model_response = response.message.content
if response.usage:
tool_model_invoke.answer_tokens = response.usage.completion_tokens
tool_model_invoke.answer_unit_price = response.usage.completion_unit_price
tool_model_invoke.answer_price_unit = response.usage.completion_price_unit
tool_model_invoke.provider_response_latency = response.usage.latency
tool_model_invoke.total_price = response.usage.total_price
tool_model_invoke.currency = response.usage.currency
db.session.commit()
return response