Initial commit

2023-05-15 08:51:32 +08:00
commit db896255d6
744 changed files with 56028 additions and 0 deletions
--- a/api/core/prompt/output_parser/suggested_questions_after_answer.py
+++ b/api/core/prompt/output_parser/suggested_questions_after_answer.py
@@ -0,0 +1,16 @@
+import json
+from typing import Any
+
+from langchain.schema import BaseOutputParser
+from core.prompt.prompts import SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT
+
+
+class SuggestedQuestionsAfterAnswerOutputParser(BaseOutputParser):
+
+    def get_format_instructions(self) -> str:
+        return SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT
+
+    def parse(self, text: str) -> Any:
+        json_string = text.strip()
+        json_obj = json.loads(json_string)
+        return json_obj
--- a/api/core/prompt/prompt_builder.py
+++ b/api/core/prompt/prompt_builder.py
@@ -0,0 +1,37 @@
+import re
+
+from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
+from langchain.schema import BaseMessage
+
+from core.prompt.prompt_template import OutLinePromptTemplate
+
+
+class PromptBuilder:
+    @classmethod
+    def to_system_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
+        prompt_template = OutLinePromptTemplate.from_template(prompt_content)
+        system_prompt_template = SystemMessagePromptTemplate(prompt=prompt_template)
+        prompt_inputs = {k: inputs[k] for k in system_prompt_template.input_variables if k in inputs}
+        system_message = system_prompt_template.format(**prompt_inputs)
+        return system_message
+
+    @classmethod
+    def to_ai_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
+        prompt_template = OutLinePromptTemplate.from_template(prompt_content)
+        ai_prompt_template = AIMessagePromptTemplate(prompt=prompt_template)
+        prompt_inputs = {k: inputs[k] for k in ai_prompt_template.input_variables if k in inputs}
+        ai_message = ai_prompt_template.format(**prompt_inputs)
+        return ai_message
+
+    @classmethod
+    def to_human_message(cls, prompt_content: str, inputs: dict) -> BaseMessage:
+        prompt_template = OutLinePromptTemplate.from_template(prompt_content)
+        human_prompt_template = HumanMessagePromptTemplate(prompt=prompt_template)
+        human_message = human_prompt_template.format(**inputs)
+        return human_message
+
+    @classmethod
+    def process_template(cls, template: str):
+        processed_template = re.sub(r'\{(.+?)\}', r'\1', template)
+        processed_template = re.sub(r'\{\{(.+?)\}\}', r'{\1}', processed_template)
+        return processed_template
--- a/api/core/prompt/prompt_template.py
+++ b/api/core/prompt/prompt_template.py
@@ -0,0 +1,37 @@
+import re
+from typing import Any
+
+from langchain import PromptTemplate
+from langchain.formatting import StrictFormatter
+
+
+class OutLinePromptTemplate(PromptTemplate):
+    @classmethod
+    def from_template(cls, template: str, **kwargs: Any) -> PromptTemplate:
+        """Load a prompt template from a template."""
+        input_variables = {
+            v for _, v, _, _ in OneLineFormatter().parse(template) if v is not None
+        }
+        return cls(
+            input_variables=list(sorted(input_variables)), template=template, **kwargs
+        )
+
+
+class OneLineFormatter(StrictFormatter):
+    def parse(self, format_string):
+        last_end = 0
+        results = []
+        for match in re.finditer(r"{([a-zA-Z_]\w*)}", format_string):
+            field_name = match.group(1)
+            start, end = match.span()
+
+            literal_text = format_string[last_end:start]
+            last_end = end
+
+            results.append((literal_text, field_name, '', None))
+
+        remaining_literal_text = format_string[last_end:]
+        if remaining_literal_text:
+            results.append((remaining_literal_text, None, None, None))
+
+        return results
--- a/api/core/prompt/prompts.py
+++ b/api/core/prompt/prompts.py
@@ -0,0 +1,63 @@
+from llama_index import QueryKeywordExtractPrompt
+
+CONVERSATION_TITLE_PROMPT = (
+    "Human:{query}\n-----\n"
+    "Help me summarize the intent of what the human said and provide a title, the title should not exceed 20 words.\n"
+    "If the human said is conducted in Chinese, you should return a Chinese title.\n" 
+    "If the human said is conducted in English, you should return an English title.\n"
+    "title:"
+)
+
+CONVERSATION_SUMMARY_PROMPT = (
+    "Please generate a short summary of the following conversation.\n"
+    "If the conversation communicating in Chinese, you should return a Chinese summary.\n"
+    "If the conversation communicating in English, you should return an English summary.\n"
+    "[Conversation Start]\n"
+    "{context}\n"
+    "[Conversation End]\n\n"
+    "summary:"
+)
+
+INTRODUCTION_GENERATE_PROMPT = (
+    "I am designing a product for users to interact with an AI through dialogue. "
+    "The Prompt given to the AI before the conversation is:\n\n"
+    "```\n{prompt}\n```\n\n"
+    "Please generate a brief introduction of no more than 50 words that greets the user, based on this Prompt. "
+    "Do not reveal the developer's motivation or deep logic behind the Prompt, "
+    "but focus on building a relationship with the user:\n"
+)
+
+MORE_LIKE_THIS_GENERATE_PROMPT = (
+    "-----\n"
+    "{original_completion}\n"
+    "-----\n\n"
+    "Please use the above content as a sample for generating the result, "
+    "and include key information points related to the original sample in the result. "
+    "Try to rephrase this information in different ways and predict according to the rules below.\n\n"
+    "-----\n"
+    "{prompt}\n"
+)
+
+SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
+    "Please help me predict the three most likely questions that human would ask, "
+    "and keeping each question under 20 characters.\n"
+    "The output must be in JSON format following the specified schema:\n"
+    "[\"question1\",\"question2\",\"question3\"]\n"
+)
+
+QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL = (
+    "A question is provided below. Given the question, extract up to {max_keywords} "
+    "keywords from the text. Focus on extracting the keywords that we can use "
+    "to best lookup answers to the question. Avoid stopwords."
+    "I am not sure which language the following question is in. "
+    "If the user asked the question in Chinese, please return the keywords in Chinese. "
+    "If the user asked the question in English, please return the keywords in English.\n"
+    "---------------------\n"
+    "{question}\n"
+    "---------------------\n"
+    "Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'\n"
+)
+
+QUERY_KEYWORD_EXTRACT_TEMPLATE = QueryKeywordExtractPrompt(
+    QUERY_KEYWORD_EXTRACT_TEMPLATE_TMPL
+)