update clean embedding cache query logic (#6483)

This commit is contained in:
Jyong
2024-07-20 01:29:25 +08:00
committed by GitHub
parent 27e08a8e2e
commit 1e0e573165
4 changed files with 46 additions and 6 deletions

View File

@@ -2,6 +2,7 @@ import datetime
import time
import click
from sqlalchemy import text
from werkzeug.exceptions import NotFound
import app
@@ -18,12 +19,18 @@ def clean_embedding_cache_task():
thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
while True:
try:
embeddings = db.session.query(Embedding).filter(Embedding.created_at < thirty_days_ago) \
embedding_ids = db.session.query(Embedding.id).filter(Embedding.created_at < thirty_days_ago) \
.order_by(Embedding.created_at.desc()).limit(100).all()
embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
except NotFound:
break
for embedding in embeddings:
db.session.delete(embedding)
db.session.commit()
if embedding_ids:
db.session.execute(text(
"DELETE FROM embeddings WHERE id in :embedding_ids"
), {'embedding_ids': tuple(embedding_ids)})
db.session.commit()
else:
break
end_at = time.perf_counter()
click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green'))