Feat/support parent child chunk (#12092)
This commit is contained in:
@@ -10,7 +10,7 @@ from configs import dify_config
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.dataset import Dataset, DatasetQuery, Document
|
||||
from models.dataset import Dataset, DatasetAutoDisableLog, DatasetQuery, Document
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
@@ -75,6 +75,23 @@ def clean_unused_datasets_task():
|
||||
)
|
||||
if not dataset_query or len(dataset_query) == 0:
|
||||
try:
|
||||
# add auto disable log
|
||||
documents = (
|
||||
db.session.query(Document)
|
||||
.filter(
|
||||
Document.dataset_id == dataset.id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for document in documents:
|
||||
dataset_auto_disable_log = DatasetAutoDisableLog(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
)
|
||||
db.session.add(dataset_auto_disable_log)
|
||||
# remove index
|
||||
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
|
||||
index_processor.clean(dataset, None)
|
||||
@@ -151,6 +168,23 @@ def clean_unused_datasets_task():
|
||||
else:
|
||||
plan = plan_cache.decode()
|
||||
if plan == "sandbox":
|
||||
# add auto disable log
|
||||
documents = (
|
||||
db.session.query(Document)
|
||||
.filter(
|
||||
Document.dataset_id == dataset.id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for document in documents:
|
||||
dataset_auto_disable_log = DatasetAutoDisableLog(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
)
|
||||
db.session.add(dataset_auto_disable_log)
|
||||
# remove index
|
||||
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
|
||||
index_processor.clean(dataset, None)
|
||||
|
66
api/schedule/mail_clean_document_notify_task.py
Normal file
66
api/schedule/mail_clean_document_notify_task.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
from celery import shared_task
|
||||
from flask import render_template
|
||||
|
||||
from extensions.ext_mail import mail
|
||||
from models.account import Account, Tenant, TenantAccountJoin
|
||||
from models.dataset import Dataset, DatasetAutoDisableLog
|
||||
|
||||
|
||||
@shared_task(queue="mail")
|
||||
def send_document_clean_notify_task():
|
||||
"""
|
||||
Async Send document clean notify mail
|
||||
|
||||
Usage: send_document_clean_notify_task.delay()
|
||||
"""
|
||||
if not mail.is_inited():
|
||||
return
|
||||
|
||||
logging.info(click.style("Start send document clean notify mail", fg="green"))
|
||||
start_at = time.perf_counter()
|
||||
|
||||
# send document clean notify mail
|
||||
try:
|
||||
dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all()
|
||||
# group by tenant_id
|
||||
dataset_auto_disable_logs_map = {}
|
||||
for dataset_auto_disable_log in dataset_auto_disable_logs:
|
||||
dataset_auto_disable_logs_map[dataset_auto_disable_log.tenant_id].append(dataset_auto_disable_log)
|
||||
|
||||
for tenant_id, tenant_dataset_auto_disable_logs in dataset_auto_disable_logs_map.items():
|
||||
knowledge_details = []
|
||||
tenant = Tenant.query.filter(Tenant.id == tenant_id).first()
|
||||
if not tenant:
|
||||
continue
|
||||
current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first()
|
||||
account = Account.query.filter(Account.id == current_owner_join.account_id).first()
|
||||
if not account:
|
||||
continue
|
||||
|
||||
dataset_auto_dataset_map = {}
|
||||
for dataset_auto_disable_log in tenant_dataset_auto_disable_logs:
|
||||
dataset_auto_dataset_map[dataset_auto_disable_log.dataset_id].append(
|
||||
dataset_auto_disable_log.document_id
|
||||
)
|
||||
|
||||
for dataset_id, document_ids in dataset_auto_dataset_map.items():
|
||||
dataset = Dataset.query.filter(Dataset.id == dataset_id).first()
|
||||
if dataset:
|
||||
document_count = len(document_ids)
|
||||
knowledge_details.append(f"<li>Knowledge base {dataset.name}: {document_count} documents</li>")
|
||||
|
||||
html_content = render_template(
|
||||
"clean_document_job_mail_template-US.html",
|
||||
)
|
||||
mail.send(to=to, subject="立即加入 Dify 工作空间", html=html_content)
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logging.info(
|
||||
click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green")
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Send invite member mail to {} failed".format(to))
|
Reference in New Issue
Block a user