feat: optimize xinference stream (#989)

This commit is contained in:
takatost
2023-08-24 13:58:34 +08:00
committed by GitHub
parent 53e83d8697
commit 18d3877151

View File

@@ -108,12 +108,12 @@ class XinferenceLLM(Xinference):
Yields: Yields:
A string token. A string token.
""" """
if isinstance(model, RESTfulGenerateModelHandle): if isinstance(model, (RESTfulChatModelHandle, RESTfulChatglmCppChatModelHandle)):
streaming_response = model.generate( streaming_response = model.chat(
prompt=prompt, generate_config=generate_config prompt=prompt, generate_config=generate_config
) )
else: else:
streaming_response = model.chat( streaming_response = model.generate(
prompt=prompt, generate_config=generate_config prompt=prompt, generate_config=generate_config
) )