Feat/queue monitor (#20647)
This commit is contained in:
@@ -491,3 +491,10 @@ OTEL_METRIC_EXPORT_TIMEOUT=30000
|
||||
|
||||
# Prevent Clickjacking
|
||||
ALLOW_EMBED=false
|
||||
|
||||
# Dataset queue monitor configuration
|
||||
QUEUE_MONITOR_THRESHOLD=200
|
||||
# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
|
||||
QUEUE_MONITOR_ALERT_EMAILS=
|
||||
# Monitor interval in minutes, default is 30 minutes
|
||||
QUEUE_MONITOR_INTERVAL=30
|
||||
|
@@ -2,7 +2,7 @@ import os
|
||||
from typing import Any, Literal, Optional
|
||||
from urllib.parse import parse_qsl, quote_plus
|
||||
|
||||
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
|
||||
from pydantic import Field, NonNegativeFloat, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from .cache.redis_config import RedisConfig
|
||||
@@ -256,6 +256,25 @@ class InternalTestConfig(BaseSettings):
|
||||
)
|
||||
|
||||
|
||||
class DatasetQueueMonitorConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Dataset Queue Monitor
|
||||
"""
|
||||
|
||||
QUEUE_MONITOR_THRESHOLD: Optional[NonNegativeInt] = Field(
|
||||
description="Threshold for dataset queue monitor",
|
||||
default=200,
|
||||
)
|
||||
QUEUE_MONITOR_ALERT_EMAILS: Optional[str] = Field(
|
||||
description="Emails for dataset queue monitor alert, separated by commas",
|
||||
default=None,
|
||||
)
|
||||
QUEUE_MONITOR_INTERVAL: Optional[NonNegativeFloat] = Field(
|
||||
description="Interval for dataset queue monitor in minutes",
|
||||
default=30,
|
||||
)
|
||||
|
||||
|
||||
class MiddlewareConfig(
|
||||
# place the configs in alphabet order
|
||||
CeleryConfig,
|
||||
@@ -303,5 +322,6 @@ class MiddlewareConfig(
|
||||
BaiduVectorDBConfig,
|
||||
OpenGaussConfig,
|
||||
TableStoreConfig,
|
||||
DatasetQueueMonitorConfig,
|
||||
):
|
||||
pass
|
||||
|
@@ -70,6 +70,7 @@ def init_app(app: DifyApp) -> Celery:
|
||||
"schedule.update_tidb_serverless_status_task",
|
||||
"schedule.clean_messages",
|
||||
"schedule.mail_clean_document_notify_task",
|
||||
"schedule.queue_monitor_task",
|
||||
]
|
||||
day = dify_config.CELERY_BEAT_SCHEDULER_TIME
|
||||
beat_schedule = {
|
||||
@@ -98,6 +99,12 @@ def init_app(app: DifyApp) -> Celery:
|
||||
"task": "schedule.mail_clean_document_notify_task.mail_clean_document_notify_task",
|
||||
"schedule": crontab(minute="0", hour="10", day_of_week="1"),
|
||||
},
|
||||
"datasets-queue-monitor": {
|
||||
"task": "schedule.queue_monitor_task.queue_monitor_task",
|
||||
"schedule": timedelta(
|
||||
minutes=dify_config.QUEUE_MONITOR_INTERVAL if dify_config.QUEUE_MONITOR_INTERVAL else 30
|
||||
),
|
||||
},
|
||||
}
|
||||
celery_app.conf.update(beat_schedule=beat_schedule, imports=imports)
|
||||
|
||||
|
62
api/schedule/queue_monitor_task.py
Normal file
62
api/schedule/queue_monitor_task.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import click
|
||||
from flask import render_template
|
||||
from redis import Redis
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_mail import mail
|
||||
|
||||
# Create a dedicated Redis connection (using the same configuration as Celery)
|
||||
celery_broker_url = dify_config.CELERY_BROKER_URL
|
||||
|
||||
parsed = urlparse(celery_broker_url)
|
||||
host = parsed.hostname or "localhost"
|
||||
port = parsed.port or 6379
|
||||
password = parsed.password or None
|
||||
redis_db = parsed.path.strip("/") or "1" # type: ignore
|
||||
|
||||
celery_redis = Redis(host=host, port=port, password=password, db=redis_db)
|
||||
|
||||
|
||||
@app.celery.task(queue="monitor")
|
||||
def queue_monitor_task():
|
||||
queue_name = "dataset"
|
||||
threshold = dify_config.QUEUE_MONITOR_THRESHOLD
|
||||
|
||||
try:
|
||||
queue_length = celery_redis.llen(f"{queue_name}")
|
||||
logging.info(click.style(f"Start monitor {queue_name}", fg="green"))
|
||||
logging.info(click.style(f"Queue length: {queue_length}", fg="green"))
|
||||
|
||||
if queue_length >= threshold:
|
||||
warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
|
||||
logging.warning(click.style(warning_msg, fg="red"))
|
||||
alter_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
|
||||
if alter_emails:
|
||||
to_list = alter_emails.split(",")
|
||||
for to in to_list:
|
||||
try:
|
||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
html_content = render_template(
|
||||
"queue_monitor_alert_email_template_en-US.html",
|
||||
queue_name=queue_name,
|
||||
queue_length=queue_length,
|
||||
threshold=threshold,
|
||||
alert_time=current_time,
|
||||
)
|
||||
mail.send(
|
||||
to=to, subject="Alert: Dataset Queue pending tasks exceeded the limit", html=html_content
|
||||
)
|
||||
except Exception as e:
|
||||
logging.exception(click.style("Exception occurred during sending email", fg="red"))
|
||||
|
||||
except Exception as e:
|
||||
logging.exception(click.style("Exception occurred during queue monitoring", fg="red"))
|
||||
finally:
|
||||
if db.session.is_active:
|
||||
db.session.close()
|
@@ -5,7 +5,7 @@ import uuid
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.model_manager import ModelManager
|
||||
@@ -69,11 +69,6 @@ def batch_create_segment_to_index_task(
|
||||
model=dataset.embedding_model,
|
||||
)
|
||||
word_count_change = 0
|
||||
segments_to_insert: list[str] = []
|
||||
max_position_stmt = select(func.max(DocumentSegment.position)).where(
|
||||
DocumentSegment.document_id == dataset_document.id
|
||||
)
|
||||
word_count_change = 0
|
||||
if embedding_model:
|
||||
tokens_list = embedding_model.get_text_embedding_num_tokens(
|
||||
texts=[segment["content"] for segment in content]
|
||||
|
129
api/templates/queue_monitor_alert_email_template_en-US.html
Normal file
129
api/templates/queue_monitor_alert_email_template_en-US.html
Normal file
@@ -0,0 +1,129 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<style>
|
||||
body {
|
||||
font-family: 'Arial', sans-serif;
|
||||
line-height: 16pt;
|
||||
color: #101828;
|
||||
background-color: #e9ebf0;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.container {
|
||||
width: 600px;
|
||||
min-height: 605px;
|
||||
margin: 40px auto;
|
||||
padding: 36px 48px;
|
||||
background-color: #fcfcfd;
|
||||
border-radius: 16px;
|
||||
border: 1px solid #ffffff;
|
||||
box-shadow: 0 2px 4px -2px rgba(9, 9, 11, 0.08);
|
||||
}
|
||||
|
||||
.header {
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
|
||||
.header img {
|
||||
max-width: 100px;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.title {
|
||||
font-weight: 600;
|
||||
font-size: 24px;
|
||||
line-height: 28.8px;
|
||||
}
|
||||
|
||||
.description {
|
||||
font-size: 13px;
|
||||
line-height: 16px;
|
||||
color: #676f83;
|
||||
margin-top: 12px;
|
||||
}
|
||||
|
||||
.alert-content {
|
||||
padding: 16px 32px;
|
||||
text-align: center;
|
||||
border-radius: 16px;
|
||||
background-color: #fef0f0;
|
||||
margin: 16px auto;
|
||||
border: 1px solid #fda29b;
|
||||
}
|
||||
|
||||
.alert-title {
|
||||
line-height: 24px;
|
||||
font-weight: 700;
|
||||
font-size: 18px;
|
||||
color: #d92d20;
|
||||
}
|
||||
|
||||
.alert-detail {
|
||||
line-height: 20px;
|
||||
font-size: 14px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.typography {
|
||||
letter-spacing: -0.07px;
|
||||
font-weight: 400;
|
||||
font-style: normal;
|
||||
font-size: 14px;
|
||||
line-height: 20px;
|
||||
color: #354052;
|
||||
margin-top: 12px;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
.typography p{
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.typography-title {
|
||||
color: #101828;
|
||||
font-size: 14px;
|
||||
font-style: normal;
|
||||
font-weight: 600;
|
||||
line-height: 20px;
|
||||
margin-top: 12px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.tip-list{
|
||||
margin: 0;
|
||||
padding-left: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<img src="https://assets.dify.ai/images/logo.png" alt="Dify Logo" />
|
||||
</div>
|
||||
<p class="title">Queue Monitoring Alert</p>
|
||||
<p class="typography">Our system has detected an abnormal queue status that requires your attention:</p>
|
||||
|
||||
<div class="alert-content">
|
||||
<div class="alert-title">Queue Task Alert</div>
|
||||
<div class="alert-detail">
|
||||
Queue "{{queue_name}}" has {{queue_length}} pending tasks (Threshold: {{threshold}})
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="typography">
|
||||
<p style="margin-bottom:4px">Recommended actions:</p>
|
||||
<p>1. Check the queue processing status in the system dashboard</p>
|
||||
<p>2. Verify if there are any processing bottlenecks</p>
|
||||
<p>3. Consider scaling up workers if needed</p>
|
||||
</div>
|
||||
|
||||
<p class="typography-title">Additional Information:</p>
|
||||
<ul class="typography tip-list">
|
||||
<li>Alert triggered at: {{alert_time}}</li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
@@ -1111,3 +1111,10 @@ OTEL_METRIC_EXPORT_TIMEOUT=30000
|
||||
|
||||
# Prevent Clickjacking
|
||||
ALLOW_EMBED=false
|
||||
|
||||
# Dataset queue monitor configuration
|
||||
QUEUE_MONITOR_THRESHOLD=200
|
||||
# You can configure multiple ones, separated by commas. eg: test1@dify.ai,test2@dify.ai
|
||||
QUEUE_MONITOR_ALERT_EMAILS=
|
||||
# Monitor interval in minutes, default is 30 minutes
|
||||
QUEUE_MONITOR_INTERVAL=30
|
||||
|
@@ -501,6 +501,9 @@ x-shared-env: &shared-api-worker-env
|
||||
OTEL_BATCH_EXPORT_TIMEOUT: ${OTEL_BATCH_EXPORT_TIMEOUT:-10000}
|
||||
OTEL_METRIC_EXPORT_TIMEOUT: ${OTEL_METRIC_EXPORT_TIMEOUT:-30000}
|
||||
ALLOW_EMBED: ${ALLOW_EMBED:-false}
|
||||
QUEUE_MONITOR_THRESHOLD: ${QUEUE_MONITOR_THRESHOLD:-200}
|
||||
QUEUE_MONITOR_ALERT_EMAILS: ${QUEUE_MONITOR_ALERT_EMAILS:-}
|
||||
QUEUE_MONITOR_INTERVAL: ${QUEUE_MONITOR_INTERVAL:-30}
|
||||
|
||||
services:
|
||||
# API service
|
||||
|
Reference in New Issue
Block a user