feat: app rate limit (#5844)

Co-authored-by: liuzhenghua-jk <liuzhenghua-jk@360shuke.com>
Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
liuzhenghua
2024-07-10 13:31:35 +00:00
committed by GitHub
parent cc8dc6d35e
commit 9622fbb62f
19 changed files with 277 additions and 56 deletions

View File

@@ -7,6 +7,7 @@ from core.app.apps.chat.app_generator import ChatAppGenerator
from core.app.apps.completion.app_generator import CompletionAppGenerator
from core.app.apps.workflow.app_generator import WorkflowAppGenerator
from core.app.entities.app_invoke_entities import InvokeFrom
from core.app.features.rate_limiting import RateLimit
from models.model import Account, App, AppMode, EndUser
from services.workflow_service import WorkflowService
@@ -29,52 +30,68 @@ class AppGenerateService:
:param streaming: streaming
:return:
"""
if app_model.mode == AppMode.COMPLETION.value:
return CompletionAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
)
elif app_model.mode == AppMode.AGENT_CHAT.value or app_model.is_agent:
return AgentChatAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
)
elif app_model.mode == AppMode.CHAT.value:
return ChatAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
)
elif app_model.mode == AppMode.ADVANCED_CHAT.value:
workflow = cls._get_workflow(app_model, invoke_from)
return AdvancedChatAppGenerator().generate(
app_model=app_model,
workflow=workflow,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
)
elif app_model.mode == AppMode.WORKFLOW.value:
workflow = cls._get_workflow(app_model, invoke_from)
return WorkflowAppGenerator().generate(
app_model=app_model,
workflow=workflow,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
)
else:
raise ValueError(f'Invalid app mode {app_model.mode}')
max_active_request = AppGenerateService._get_max_active_requests(app_model)
rate_limit = RateLimit(app_model.id, max_active_request)
request_id = RateLimit.gen_request_key()
try:
request_id = rate_limit.enter(request_id)
if app_model.mode == AppMode.COMPLETION.value:
return rate_limit.generate(CompletionAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
), request_id)
elif app_model.mode == AppMode.AGENT_CHAT.value or app_model.is_agent:
return rate_limit.generate(AgentChatAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
), request_id)
elif app_model.mode == AppMode.CHAT.value:
return rate_limit.generate(ChatAppGenerator().generate(
app_model=app_model,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
), request_id)
elif app_model.mode == AppMode.ADVANCED_CHAT.value:
workflow = cls._get_workflow(app_model, invoke_from)
return rate_limit.generate(AdvancedChatAppGenerator().generate(
app_model=app_model,
workflow=workflow,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
), request_id)
elif app_model.mode == AppMode.WORKFLOW.value:
workflow = cls._get_workflow(app_model, invoke_from)
return rate_limit.generate(WorkflowAppGenerator().generate(
app_model=app_model,
workflow=workflow,
user=user,
args=args,
invoke_from=invoke_from,
stream=streaming
), request_id)
else:
raise ValueError(f'Invalid app mode {app_model.mode}')
finally:
if not streaming:
rate_limit.exit(request_id)
@staticmethod
def _get_max_active_requests(app_model: App) -> int:
max_active_requests = app_model.max_active_requests
if app_model.max_active_requests is None:
from flask import current_app
max_active_requests = int(current_app.config['APP_MAX_ACTIVE_REQUESTS'])
return max_active_requests
@classmethod
def generate_single_iteration(cls, app_model: App,