feat: backend model load balancing support (#4927)

This commit is contained in:
takatost
2024-06-05 00:13:04 +08:00
committed by GitHub
parent 52ec152dd3
commit d1dbbc1e33
47 changed files with 2191 additions and 256 deletions

View File

@@ -12,6 +12,7 @@ from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.utils.encoders import jsonable_encoder
from libs.login import login_required
from models.account import TenantAccountRole
from services.model_load_balancing_service import ModelLoadBalancingService
from services.model_provider_service import ModelProviderService
@@ -104,21 +105,56 @@ class ModelProviderModelApi(Resource):
parser.add_argument('model', type=str, required=True, nullable=False, location='json')
parser.add_argument('model_type', type=str, required=True, nullable=False,
choices=[mt.value for mt in ModelType], location='json')
parser.add_argument('credentials', type=dict, required=True, nullable=False, location='json')
parser.add_argument('credentials', type=dict, required=False, nullable=True, location='json')
parser.add_argument('load_balancing', type=dict, required=False, nullable=True, location='json')
parser.add_argument('config_from', type=str, required=False, nullable=True, location='json')
args = parser.parse_args()
model_provider_service = ModelProviderService()
model_load_balancing_service = ModelLoadBalancingService()
try:
model_provider_service.save_model_credentials(
if ('load_balancing' in args and args['load_balancing'] and
'enabled' in args['load_balancing'] and args['load_balancing']['enabled']):
if 'configs' not in args['load_balancing']:
raise ValueError('invalid load balancing configs')
# save load balancing configs
model_load_balancing_service.update_load_balancing_configs(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type'],
credentials=args['credentials']
configs=args['load_balancing']['configs']
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
# enable load balancing
model_load_balancing_service.enable_model_load_balancing(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type']
)
else:
# disable load balancing
model_load_balancing_service.disable_model_load_balancing(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type']
)
if args.get('config_from', '') != 'predefined-model':
model_provider_service = ModelProviderService()
try:
model_provider_service.save_model_credentials(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type'],
credentials=args['credentials']
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
return {'result': 'success'}, 200
@@ -170,11 +206,73 @@ class ModelProviderModelCredentialApi(Resource):
model=args['model']
)
model_load_balancing_service = ModelLoadBalancingService()
is_load_balancing_enabled, load_balancing_configs = model_load_balancing_service.get_load_balancing_configs(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type']
)
return {
"credentials": credentials
"credentials": credentials,
"load_balancing": {
"enabled": is_load_balancing_enabled,
"configs": load_balancing_configs
}
}
class ModelProviderModelEnableApi(Resource):
@setup_required
@login_required
@account_initialization_required
def patch(self, provider: str):
tenant_id = current_user.current_tenant_id
parser = reqparse.RequestParser()
parser.add_argument('model', type=str, required=True, nullable=False, location='json')
parser.add_argument('model_type', type=str, required=True, nullable=False,
choices=[mt.value for mt in ModelType], location='json')
args = parser.parse_args()
model_provider_service = ModelProviderService()
model_provider_service.enable_model(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type']
)
return {'result': 'success'}
class ModelProviderModelDisableApi(Resource):
@setup_required
@login_required
@account_initialization_required
def patch(self, provider: str):
tenant_id = current_user.current_tenant_id
parser = reqparse.RequestParser()
parser.add_argument('model', type=str, required=True, nullable=False, location='json')
parser.add_argument('model_type', type=str, required=True, nullable=False,
choices=[mt.value for mt in ModelType], location='json')
args = parser.parse_args()
model_provider_service = ModelProviderService()
model_provider_service.disable_model(
tenant_id=tenant_id,
provider=provider,
model=args['model'],
model_type=args['model_type']
)
return {'result': 'success'}
class ModelProviderModelValidateApi(Resource):
@setup_required
@@ -259,6 +357,10 @@ class ModelProviderAvailableModelApi(Resource):
api.add_resource(ModelProviderModelApi, '/workspaces/current/model-providers/<string:provider>/models')
api.add_resource(ModelProviderModelEnableApi, '/workspaces/current/model-providers/<string:provider>/models/enable',
endpoint='model-provider-model-enable')
api.add_resource(ModelProviderModelDisableApi, '/workspaces/current/model-providers/<string:provider>/models/disable',
endpoint='model-provider-model-disable')
api.add_resource(ModelProviderModelCredentialApi,
'/workspaces/current/model-providers/<string:provider>/models/credentials')
api.add_resource(ModelProviderModelValidateApi,