chore: optimize SQL queries that perform partial full table scans (#24786)
This commit is contained in:
@@ -13,12 +13,39 @@ from core.entities.provider_entities import QuotaUnit, SystemConfiguration
|
||||
from core.plugin.entities.plugin import ModelProviderID
|
||||
from events.message_event import message_was_created
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client, redis_fallback
|
||||
from libs import datetime_utils
|
||||
from models.model import Message
|
||||
from models.provider import Provider, ProviderType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redis cache key prefix for provider last used timestamps
|
||||
_PROVIDER_LAST_USED_CACHE_PREFIX = "provider:last_used"
|
||||
# Default TTL for cache entries (10 minutes)
|
||||
_CACHE_TTL_SECONDS = 600
|
||||
LAST_USED_UPDATE_WINDOW_SECONDS = 60 * 5
|
||||
|
||||
|
||||
def _get_provider_cache_key(tenant_id: str, provider_name: str) -> str:
|
||||
"""Generate Redis cache key for provider last used timestamp."""
|
||||
return f"{_PROVIDER_LAST_USED_CACHE_PREFIX}:{tenant_id}:{provider_name}"
|
||||
|
||||
|
||||
@redis_fallback(default_return=None)
|
||||
def _get_last_update_timestamp(cache_key: str) -> Optional[datetime]:
|
||||
"""Get last update timestamp from Redis cache."""
|
||||
timestamp_str = redis_client.get(cache_key)
|
||||
if timestamp_str:
|
||||
return datetime.fromtimestamp(float(timestamp_str.decode("utf-8")))
|
||||
return None
|
||||
|
||||
|
||||
@redis_fallback()
|
||||
def _set_last_update_timestamp(cache_key: str, timestamp: datetime) -> None:
|
||||
"""Set last update timestamp in Redis cache with TTL."""
|
||||
redis_client.setex(cache_key, _CACHE_TTL_SECONDS, str(timestamp.timestamp()))
|
||||
|
||||
|
||||
class _ProviderUpdateFilters(BaseModel):
|
||||
"""Filters for identifying Provider records to update."""
|
||||
@@ -139,7 +166,7 @@ def handle(sender: Message, **kwargs):
|
||||
provider_name,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
# Log failure with timing and context
|
||||
duration = time_module.perf_counter() - start_time
|
||||
|
||||
@@ -215,8 +242,23 @@ def _execute_provider_updates(updates_to_perform: list[_ProviderUpdateOperation]
|
||||
|
||||
# Prepare values dict for SQLAlchemy update
|
||||
update_values = {}
|
||||
# updateing to `last_used` is removed due to performance reason.
|
||||
# ref: https://github.com/langgenius/dify/issues/24526
|
||||
|
||||
# NOTE: For frequently used providers under high load, this implementation may experience
|
||||
# race conditions or update contention despite the time-window optimization:
|
||||
# 1. Multiple concurrent requests might check the same cache key simultaneously
|
||||
# 2. Redis cache operations are not atomic with database updates
|
||||
# 3. Heavy providers could still face database lock contention during peak usage
|
||||
# The current implementation is acceptable for most scenarios, but future optimization
|
||||
# considerations could include: batched updates, or async processing.
|
||||
if values.last_used is not None:
|
||||
cache_key = _get_provider_cache_key(filters.tenant_id, filters.provider_name)
|
||||
now = datetime_utils.naive_utc_now()
|
||||
last_update = _get_last_update_timestamp(cache_key)
|
||||
|
||||
if last_update is None or (now - last_update).total_seconds() > LAST_USED_UPDATE_WINDOW_SECONDS:
|
||||
update_values["last_used"] = values.last_used
|
||||
_set_last_update_timestamp(cache_key, now)
|
||||
|
||||
if values.quota_used is not None:
|
||||
update_values["quota_used"] = values.quota_used
|
||||
# Skip the current update operation if no updates are required.
|
||||
|
Reference in New Issue
Block a user