feat(large_language_model): Adds plugin-based token counting configuration option (#17706)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Yeuoly <admin@srmxy.cn>
This commit is contained in:
@@ -326,6 +326,7 @@ UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
|
|||||||
MULTIMODAL_SEND_FORMAT=base64
|
MULTIMODAL_SEND_FORMAT=base64
|
||||||
PROMPT_GENERATION_MAX_TOKENS=512
|
PROMPT_GENERATION_MAX_TOKENS=512
|
||||||
CODE_GENERATION_MAX_TOKENS=1024
|
CODE_GENERATION_MAX_TOKENS=1024
|
||||||
|
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
|
||||||
|
|
||||||
# Mail configuration, support: resend, smtp
|
# Mail configuration, support: resend, smtp
|
||||||
MAIL_TYPE=
|
MAIL_TYPE=
|
||||||
|
@@ -442,7 +442,7 @@ class LoggingConfig(BaseSettings):
|
|||||||
|
|
||||||
class ModelLoadBalanceConfig(BaseSettings):
|
class ModelLoadBalanceConfig(BaseSettings):
|
||||||
"""
|
"""
|
||||||
Configuration for model load balancing
|
Configuration for model load balancing and token counting
|
||||||
"""
|
"""
|
||||||
|
|
||||||
MODEL_LB_ENABLED: bool = Field(
|
MODEL_LB_ENABLED: bool = Field(
|
||||||
@@ -450,6 +450,11 @@ class ModelLoadBalanceConfig(BaseSettings):
|
|||||||
default=False,
|
default=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PLUGIN_BASED_TOKEN_COUNTING_ENABLED: bool = Field(
|
||||||
|
description="Enable or disable plugin based token counting. If disabled, token counting will return 0.",
|
||||||
|
default=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BillingConfig(BaseSettings):
|
class BillingConfig(BaseSettings):
|
||||||
"""
|
"""
|
||||||
|
@@ -53,20 +53,6 @@ class AgentChatAppRunner(AppRunner):
|
|||||||
query = application_generate_entity.query
|
query = application_generate_entity.query
|
||||||
files = application_generate_entity.files
|
files = application_generate_entity.files
|
||||||
|
|
||||||
# Pre-calculate the number of tokens of the prompt messages,
|
|
||||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
|
||||||
# If the rest number of tokens is not enough, raise exception.
|
|
||||||
# Include: prompt template, inputs, query(optional), files(optional)
|
|
||||||
# Not Include: memory, external data, dataset context
|
|
||||||
self.get_pre_calculate_rest_tokens(
|
|
||||||
app_record=app_record,
|
|
||||||
model_config=application_generate_entity.model_conf,
|
|
||||||
prompt_template_entity=app_config.prompt_template,
|
|
||||||
inputs=dict(inputs),
|
|
||||||
files=list(files),
|
|
||||||
query=query,
|
|
||||||
)
|
|
||||||
|
|
||||||
memory = None
|
memory = None
|
||||||
if application_generate_entity.conversation_id:
|
if application_generate_entity.conversation_id:
|
||||||
# get memory of conversation (read-only)
|
# get memory of conversation (read-only)
|
||||||
|
@@ -61,20 +61,6 @@ class ChatAppRunner(AppRunner):
|
|||||||
)
|
)
|
||||||
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
|
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
|
||||||
|
|
||||||
# Pre-calculate the number of tokens of the prompt messages,
|
|
||||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
|
||||||
# If the rest number of tokens is not enough, raise exception.
|
|
||||||
# Include: prompt template, inputs, query(optional), files(optional)
|
|
||||||
# Not Include: memory, external data, dataset context
|
|
||||||
self.get_pre_calculate_rest_tokens(
|
|
||||||
app_record=app_record,
|
|
||||||
model_config=application_generate_entity.model_conf,
|
|
||||||
prompt_template_entity=app_config.prompt_template,
|
|
||||||
inputs=inputs,
|
|
||||||
files=files,
|
|
||||||
query=query,
|
|
||||||
)
|
|
||||||
|
|
||||||
memory = None
|
memory = None
|
||||||
if application_generate_entity.conversation_id:
|
if application_generate_entity.conversation_id:
|
||||||
# get memory of conversation (read-only)
|
# get memory of conversation (read-only)
|
||||||
|
@@ -54,20 +54,6 @@ class CompletionAppRunner(AppRunner):
|
|||||||
)
|
)
|
||||||
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
|
image_detail_config = image_detail_config or ImagePromptMessageContent.DETAIL.LOW
|
||||||
|
|
||||||
# Pre-calculate the number of tokens of the prompt messages,
|
|
||||||
# and return the rest number of tokens by model context token size limit and max token size limit.
|
|
||||||
# If the rest number of tokens is not enough, raise exception.
|
|
||||||
# Include: prompt template, inputs, query(optional), files(optional)
|
|
||||||
# Not Include: memory, external data, dataset context
|
|
||||||
self.get_pre_calculate_rest_tokens(
|
|
||||||
app_record=app_record,
|
|
||||||
model_config=application_generate_entity.model_conf,
|
|
||||||
prompt_template_entity=app_config.prompt_template,
|
|
||||||
inputs=inputs,
|
|
||||||
files=files,
|
|
||||||
query=query,
|
|
||||||
)
|
|
||||||
|
|
||||||
# organize all inputs and template to prompt messages
|
# organize all inputs and template to prompt messages
|
||||||
# Include: prompt template, inputs, query(optional), files(optional)
|
# Include: prompt template, inputs, query(optional), files(optional)
|
||||||
prompt_messages, stop = self.organize_prompt_messages(
|
prompt_messages, stop = self.organize_prompt_messages(
|
||||||
|
@@ -192,7 +192,7 @@ def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[Pr
|
|||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens. This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
|
Sometimes, you might not want to return 0 directly. In such cases, you can use `self._get_num_tokens_by_gpt2(text: str)` to get pre-computed tokens and ensure environment variable `PLUGIN_BASED_TOKEN_COUNTING_ENABLED` is set to `true`, This method is provided by the `AIModel` base class, and it uses GPT2's Tokenizer for calculation. However, it should be noted that this is only a substitute and may not be fully accurate.
|
||||||
|
|
||||||
- Model Credentials Validation
|
- Model Credentials Validation
|
||||||
|
|
||||||
|
@@ -179,7 +179,7 @@ provider_credential_schema:
|
|||||||
"""
|
"""
|
||||||
```
|
```
|
||||||
|
|
||||||
有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
|
有时候,也许你不需要直接返回0,所以你可以使用`self._get_num_tokens_by_gpt2(text: str)`来获取预计算的tokens,并确保环境变量`PLUGIN_BASED_TOKEN_COUNTING_ENABLED`设置为`true`,这个方法位于`AIModel`基类中,它会使用GPT2的Tokenizer进行计算,但是只能作为替代方法,并不完全准确。
|
||||||
|
|
||||||
- 模型凭据校验
|
- 模型凭据校验
|
||||||
|
|
||||||
|
@@ -295,18 +295,20 @@ class LargeLanguageModel(AIModel):
|
|||||||
:param tools: tools for tool calling
|
:param tools: tools for tool calling
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
plugin_model_manager = PluginModelManager()
|
if dify_config.PLUGIN_BASED_TOKEN_COUNTING_ENABLED:
|
||||||
return plugin_model_manager.get_llm_num_tokens(
|
plugin_model_manager = PluginModelManager()
|
||||||
tenant_id=self.tenant_id,
|
return plugin_model_manager.get_llm_num_tokens(
|
||||||
user_id="unknown",
|
tenant_id=self.tenant_id,
|
||||||
plugin_id=self.plugin_id,
|
user_id="unknown",
|
||||||
provider=self.provider_name,
|
plugin_id=self.plugin_id,
|
||||||
model_type=self.model_type.value,
|
provider=self.provider_name,
|
||||||
model=model,
|
model_type=self.model_type.value,
|
||||||
credentials=credentials,
|
model=model,
|
||||||
prompt_messages=prompt_messages,
|
credentials=credentials,
|
||||||
tools=tools,
|
prompt_messages=prompt_messages,
|
||||||
)
|
tools=tools,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
def _calc_response_usage(
|
def _calc_response_usage(
|
||||||
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int
|
self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int
|
||||||
|
@@ -616,6 +616,11 @@ PROMPT_GENERATION_MAX_TOKENS=512
|
|||||||
# Default: 1024 tokens.
|
# Default: 1024 tokens.
|
||||||
CODE_GENERATION_MAX_TOKENS=1024
|
CODE_GENERATION_MAX_TOKENS=1024
|
||||||
|
|
||||||
|
# Enable or disable plugin based token counting. If disabled, token counting will return 0.
|
||||||
|
# This can improve performance by skipping token counting operations.
|
||||||
|
# Default: false (disabled).
|
||||||
|
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Multi-modal Configuration
|
# Multi-modal Configuration
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
@@ -276,6 +276,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true}
|
SCARF_NO_ANALYTICS: ${SCARF_NO_ANALYTICS:-true}
|
||||||
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
|
PROMPT_GENERATION_MAX_TOKENS: ${PROMPT_GENERATION_MAX_TOKENS:-512}
|
||||||
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
|
CODE_GENERATION_MAX_TOKENS: ${CODE_GENERATION_MAX_TOKENS:-1024}
|
||||||
|
PLUGIN_BASED_TOKEN_COUNTING_ENABLED: ${PLUGIN_BASED_TOKEN_COUNTING_ENABLED:-false}
|
||||||
MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64}
|
MULTIMODAL_SEND_FORMAT: ${MULTIMODAL_SEND_FORMAT:-base64}
|
||||||
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
|
UPLOAD_IMAGE_FILE_SIZE_LIMIT: ${UPLOAD_IMAGE_FILE_SIZE_LIMIT:-10}
|
||||||
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}
|
UPLOAD_VIDEO_FILE_SIZE_LIMIT: ${UPLOAD_VIDEO_FILE_SIZE_LIMIT:-100}
|
||||||
|
Reference in New Issue
Block a user