fix: first agent latency (#2334)
This commit is contained in:
@@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
|
|||||||
tool_input='',
|
tool_input='',
|
||||||
messages_ids=message_file_ids
|
messages_ids=message_file_ids
|
||||||
)
|
)
|
||||||
self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
|
|
||||||
|
|
||||||
# recale llm max tokens
|
# recale llm max tokens
|
||||||
self.recale_llm_max_tokens(self.model_config, prompt_messages)
|
self.recale_llm_max_tokens(self.model_config, prompt_messages)
|
||||||
@@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
|
|||||||
current_llm_usage = None
|
current_llm_usage = None
|
||||||
|
|
||||||
if self.stream_tool_call:
|
if self.stream_tool_call:
|
||||||
|
is_first_chunk = True
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
|
if is_first_chunk:
|
||||||
|
self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
|
||||||
|
is_first_chunk = False
|
||||||
# check if there is any tool call
|
# check if there is any tool call
|
||||||
if self.check_tool_calls(chunk):
|
if self.check_tool_calls(chunk):
|
||||||
function_call_state = True
|
function_call_state = True
|
||||||
@@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
|
|||||||
if not result.message.content:
|
if not result.message.content:
|
||||||
result.message.content = ''
|
result.message.content = ''
|
||||||
|
|
||||||
|
self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
|
||||||
|
|
||||||
yield LLMResultChunk(
|
yield LLMResultChunk(
|
||||||
model=model_instance.model,
|
model=model_instance.model,
|
||||||
prompt_messages=result.prompt_messages,
|
prompt_messages=result.prompt_messages,
|
||||||
|
Reference in New Issue
Block a user