reasoning model unified think tag is <think></think> (#13392)

Co-authored-by: crazywoola <427733928@qq.com>
2025-02-08 16:19:41 +08:00
parent 78708eb5d5
commit 286cdc41ab
5 changed files with 26 additions and 25 deletions
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -30,11 +30,6 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel

 logger = logging.getLogger(__name__)

-HTML_THINKING_TAG = (
-    '<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
-    "<summary> Thinking... </summary>"
-)
-

 class LargeLanguageModel(AIModel):
    """
@@ -408,7 +403,7 @@ if you are not sure about the structure.
    def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
        """
        If the reasoning response is from delta.get("reasoning_content"), we wrap
-        it with HTML details tag.
+        it with HTML think tag.

        :param delta: delta dictionary from LLM streaming response
        :param is_reasoning: is reasoning
@@ -420,25 +415,15 @@ if you are not sure about the structure.

        if reasoning_content:
            if not is_reasoning:
-                content = HTML_THINKING_TAG + reasoning_content
+                content = "<think>\n" + reasoning_content
                is_reasoning = True
            else:
                content = reasoning_content
        elif is_reasoning:
-            content = "</details>" + content
+            content = "\n</think>" + content
            is_reasoning = False
        return content, is_reasoning

-    def _wrap_thinking_by_tag(self, content: str) -> str:
-        """
-        if the reasoning response is a <think>...</think> block from delta.get("content"),
-        we replace <think> to <detail>.
-
-        :param content: delta.get("content")
-        :return: processed_content
-        """
-        return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
-
    def _invoke_result_generator(
        self,
        model: str,
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -367,7 +367,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):

                # transform assistant message to prompt message
                text = chunk_json["response"]
-            text = self._wrap_thinking_by_tag(text)

            assistant_prompt_message = AssistantPromptMessage(content=text)

--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -528,7 +528,6 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                    delta_content, is_reasoning_started = self._wrap_thinking_by_reasoning_content(
                        delta, is_reasoning_started
                    )
-                    delta_content = self._wrap_thinking_by_tag(delta_content)

                    assistant_message_tool_calls = None

--- a/api/core/model_runtime/model_providers/xinference/llm/llm.py
+++ b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -654,7 +654,6 @@ class XinferenceAILargeLanguageModel(LargeLanguageModel):
            if function_call:
                assistant_message_tool_calls += [self._extract_response_function_call(function_call)]

-            delta_content = self._wrap_thinking_by_tag(delta_content)
            # transform assistant message to prompt message
            assistant_prompt_message = AssistantPromptMessage(
                content=delta_content or "", tool_calls=assistant_message_tool_calls