fix: first agent latency (#2334)

2024-02-01 15:30:50 +08:00
parent 09acf215f0
commit 3b357f51a6
1 changed files with 6 additions and 1 deletions
--- a/api/core/features/assistant_fc_runner.py
+++ b/api/core/features/assistant_fc_runner.py
@@ -97,7 +97,6 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                tool_input='',
                messages_ids=message_file_ids
            )
-            self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)

            # recale llm max tokens
            self.recale_llm_max_tokens(self.model_config, prompt_messages)
@@ -124,7 +123,11 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
            current_llm_usage = None

            if self.stream_tool_call:
+                is_first_chunk = True
                for chunk in chunks:
+                    if is_first_chunk:
+                        self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
+                        is_first_chunk = False
                    # check if there is any tool call
                    if self.check_tool_calls(chunk):
                        function_call_state = True
@@ -183,6 +186,8 @@ class AssistantFunctionCallApplicationRunner(BaseAssistantApplicationRunner):
                if not result.message.content:
                    result.message.content = ''

+                self.queue_manager.publish_agent_thought(agent_thought, PublishFrom.APPLICATION_MANAGER)
+                
                yield LLMResultChunk(
                    model=model_instance.model,
                    prompt_messages=result.prompt_messages,