Feat/queue monitor (#20647)

This commit is contained in:
Dongyu Li
2025-06-04 19:56:34 +08:00
committed by GitHub
parent 4f066454d0
commit 92614765ff
8 changed files with 237 additions and 7 deletions

View File

@@ -491,3 +491,10 @@ OTEL_METRIC_EXPORT_TIMEOUT=30000
# Prevent Clickjacking
ALLOW_EMBED=false
# Dataset queue monitor configuration
QUEUE_MONITOR_THRESHOLD=200
# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
QUEUE_MONITOR_ALERT_EMAILS=
# Monitor interval in minutes, default is 30 minutes
QUEUE_MONITOR_INTERVAL=30

View File

@@ -2,7 +2,7 @@ import os
from typing import Any, Literal, Optional
from urllib.parse import parse_qsl, quote_plus
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
from pydantic import Field, NonNegativeFloat, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
from pydantic_settings import BaseSettings
from .cache.redis_config import RedisConfig
@@ -256,6 +256,25 @@ class InternalTestConfig(BaseSettings):
)
class DatasetQueueMonitorConfig(BaseSettings):
"""
Configuration settings for Dataset Queue Monitor
"""
QUEUE_MONITOR_THRESHOLD: Optional[NonNegativeInt] = Field(
description="Threshold for dataset queue monitor",
default=200,
)
QUEUE_MONITOR_ALERT_EMAILS: Optional[str] = Field(
description="Emails for dataset queue monitor alert, separated by commas",
default=None,
)
QUEUE_MONITOR_INTERVAL: Optional[NonNegativeFloat] = Field(
description="Interval for dataset queue monitor in minutes",
default=30,
)
class MiddlewareConfig(
# place the configs in alphabet order
CeleryConfig,
@@ -303,5 +322,6 @@ class MiddlewareConfig(
BaiduVectorDBConfig,
OpenGaussConfig,
TableStoreConfig,
DatasetQueueMonitorConfig,
):
pass

View File

@@ -70,6 +70,7 @@ def init_app(app: DifyApp) -> Celery:
"schedule.update_tidb_serverless_status_task",
"schedule.clean_messages",
"schedule.mail_clean_document_notify_task",
"schedule.queue_monitor_task",
]
day = dify_config.CELERY_BEAT_SCHEDULER_TIME
beat_schedule = {
@@ -98,6 +99,12 @@ def init_app(app: DifyApp) -> Celery:
"task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task",
"schedule": crontab(minute="0", hour="10", day_of_week="1"),
},
"datasets-queue-monitor": {
"task": "schedule.queue_monitor_task.queue_monitor_task",
"schedule": timedelta(
minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30
),
},
}
celery_app.conf.update(beat_schedule=beat_schedule, imports=imports)

View File

@@ -0,0 +1,62 @@
import logging
from datetime import datetime
from urllib.parse import urlparse
import click
from flask import render_template
from redis import Redis
import app
from configs import dify_config
from extensions.ext_database import db
from extensions.ext_mail import mail
# Create a dedicated Redis connection (using the same configuration as Celery)
celery_broker_url = dify_config.CELERY_BROKER_URL
parsed = urlparse(celery_broker_url)
host = parsed.hostname or "localhost"
port = parsed.port or 6379
password = parsed.password or None
redis_db = parsed.path.strip("/") or "1" # type: ignore
celery_redis = Redis(host=host, port=port, password=password, db=redis_db)
@app.celery.task(queue="monitor")
def queue_monitor_task():
queue_name = "dataset"
threshold = dify_config.QUEUE_MONITOR_THRESHOLD
try:
queue_length = celery_redis.llen(f"{queue_name}")
logging.info(click.style(f"Start monitor {queue_name}", fg="green"))
logging.info(click.style(f"Queue length: {queue_length}", fg="green"))
if queue_length >= threshold:
warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
logging.warning(click.style(warning_msg, fg="red"))
alter_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
if alter_emails:
to_list = alter_emails.split(",")
for to in to_list:
try:
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
html_content = render_template(
"queue_monitor_alert_email_template_en-US.html",
queue_name=queue_name,
queue_length=queue_length,
threshold=threshold,
alert_time=current_time,
)
mail.send(
to=to, subject="Alert: Dataset Queue pending tasks exceeded the limit", html=html_content
)
except Exception as e:
logging.exception(click.style("Exception occurred during sending email", fg="red"))
except Exception as e:
logging.exception(click.style("Exception occurred during queue monitoring", fg="red"))
finally:
if db.session.is_active:
db.session.close()

View File

@@ -5,7 +5,7 @@ import uuid
import click
from celery import shared_task # type: ignore
from sqlalchemy import func, select
from sqlalchemy import func
from sqlalchemy.orm import Session
from core.model_manager import ModelManager
@@ -69,11 +69,6 @@ def batch_create_segment_to_index_task(
model=dataset.embedding_model,
)
word_count_change = 0
segments_to_insert: list[str] = []
max_position_stmt = select(func.max(DocumentSegment.position)).where(
DocumentSegment.document_id == dataset_document.id
)
word_count_change = 0
if embedding_model:
tokens_list = embedding_model.get_text_embedding_num_tokens(
texts=[segment["content"] for segment in content]

View File

@@ -0,0 +1,129 @@
<!DOCTYPE html>
<html>
<head>
<style>
body {
font-family: 'Arial', sans-serif;
line-height: 16pt;
color: #101828;
background-color: #e9ebf0;
margin: 0;
padding: 0;
}
.container {
width: 600px;
min-height: 605px;
margin: 40px auto;
padding: 36px 48px;
background-color: #fcfcfd;
border-radius: 16px;
border: 1px solid #ffffff;
box-shadow: 0 2px 4px -2px rgba(9, 9, 11, 0.08);
}
.header {
margin-bottom: 24px;
}
.header img {
max-width: 100px;
height: auto;
}
.title {
font-weight: 600;
font-size: 24px;
line-height: 28.8px;
}
.description {
font-size: 13px;
line-height: 16px;
color: #676f83;
margin-top: 12px;
}
.alert-content {
padding: 16px 32px;
text-align: center;
border-radius: 16px;
background-color: #fef0f0;
margin: 16px auto;
border: 1px solid #fda29b;
}
.alert-title {
line-height: 24px;
font-weight: 700;
font-size: 18px;
color: #d92d20;
}
.alert-detail {
line-height: 20px;
font-size: 14px;
margin-top: 8px;
}
.typography {
letter-spacing: -0.07px;
font-weight: 400;
font-style: normal;
font-size: 14px;
line-height: 20px;
color: #354052;
margin-top: 12px;
margin-bottom: 12px;
}
.typography p{
margin: 0 auto;
}
.typography-title {
color: #101828;
font-size: 14px;
font-style: normal;
font-weight: 600;
line-height: 20px;
margin-top: 12px;
margin-bottom: 4px;
}
.tip-list{
margin: 0;
padding-left: 10px;
}
</style>
</head>
<body>
<div class="container">
<div class="header">
<img src="https://assets.dify.ai/images/logo.png" alt="Dify Logo" />
</div>
<p class="title">Queue Monitoring Alert</p>
<p class="typography">Our system has detected an abnormal queue status that requires your attention:</p>
<div class="alert-content">
<div class="alert-title">Queue Task Alert</div>
<div class="alert-detail">
Queue "{{queue_name}}" has {{queue_length}} pending tasks (Threshold: {{threshold}})
</div>
</div>
<div class="typography">
<p style="margin-bottom:4px">Recommended actions:</p>
<p>1. Check the queue processing status in the system dashboard</p>
<p>2. Verify if there are any processing bottlenecks</p>
<p>3. Consider scaling up workers if needed</p>
</div>
<p class="typography-title">Additional Information:</p>
<ul class="typography tip-list">
<li>Alert triggered at: {{alert_time}}</li>
</ul>
</div>
</body>
</html>

View File

@@ -1111,3 +1111,10 @@ OTEL_METRIC_EXPORT_TIMEOUT=30000
# Prevent Clickjacking
ALLOW_EMBED=false
# Dataset queue monitor configuration
QUEUE_MONITOR_THRESHOLD=200
# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
QUEUE_MONITOR_ALERT_EMAILS=
# Monitor interval in minutes, default is 30 minutes
QUEUE_MONITOR_INTERVAL=30

View File

@@ -501,6 +501,9 @@ x-shared-env: &shared-api-worker-env
OTEL_BATCH_EXPORT_TIMEOUT: ${OTEL_BATCH_EXPORT_TIMEOUT:-10000}
OTEL_METRIC_EXPORT_TIMEOUT: ${OTEL_METRIC_EXPORT_TIMEOUT:-30000}
ALLOW_EMBED: ${ALLOW_EMBED:-false}
QUEUE_MONITOR_THRESHOLD: ${QUEUE_MONITOR_THRESHOLD:-200}
QUEUE_MONITOR_ALERT_EMAILS: ${QUEUE_MONITOR_ALERT_EMAILS:-}
QUEUE_MONITOR_INTERVAL: ${QUEUE_MONITOR_INTERVAL:-30}
services:
# API service