make logging not use f-str, change others to f-str (#22882)

This commit is contained in:
Asuka Minato
2025-07-25 11:32:48 +09:00
committed by GitHub
parent 570aee5fe6
commit a189d293f8
164 changed files with 557 additions and 563 deletions

View File

@@ -605,8 +605,9 @@ class DatasetService:
except ProviderTokenNotInitError:
# If we can't get the embedding model, preserve existing settings
logging.warning(
f"Failed to initialize embedding model {data['embedding_model_provider']}/{data['embedding_model']}, "
f"preserving existing settings"
"Failed to initialize embedding model %s/%s, preserving existing settings",
data["embedding_model_provider"],
data["embedding_model"],
)
if dataset.embedding_model_provider and dataset.embedding_model:
filtered_data["embedding_model_provider"] = dataset.embedding_model_provider
@@ -649,11 +650,11 @@ class DatasetService:
@staticmethod
def check_dataset_permission(dataset, user):
if dataset.tenant_id != user.current_tenant_id:
logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}")
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.")
if user.current_role != TenantAccountRole.OWNER:
if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id:
logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}")
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.")
if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM:
# For partial team permission, user needs explicit permission or be the creator
@@ -662,7 +663,7 @@ class DatasetService:
db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first()
)
if not user_permission:
logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}")
logging.debug("User %s does not have permission to access dataset %s", user.id, dataset.id)
raise NoPermissionError("You do not have permission to access this dataset.")
@staticmethod
@@ -1000,7 +1001,7 @@ class DocumentService:
db.session.add(document)
db.session.commit()
# set document paused flag
indexing_cache_key = "document_{}_is_paused".format(document.id)
indexing_cache_key = f"document_{document.id}_is_paused"
redis_client.setnx(indexing_cache_key, "True")
@staticmethod
@@ -1015,7 +1016,7 @@ class DocumentService:
db.session.add(document)
db.session.commit()
# delete paused flag
indexing_cache_key = "document_{}_is_paused".format(document.id)
indexing_cache_key = f"document_{document.id}_is_paused"
redis_client.delete(indexing_cache_key)
# trigger async task
recover_document_indexing_task.delay(document.dataset_id, document.id)
@@ -1024,7 +1025,7 @@ class DocumentService:
def retry_document(dataset_id: str, documents: list[Document]):
for document in documents:
# add retry flag
retry_indexing_cache_key = "document_{}_is_retried".format(document.id)
retry_indexing_cache_key = f"document_{document.id}_is_retried"
cache_result = redis_client.get(retry_indexing_cache_key)
if cache_result is not None:
raise ValueError("Document is being retried, please try again later")
@@ -1041,7 +1042,7 @@ class DocumentService:
@staticmethod
def sync_website_document(dataset_id: str, document: Document):
# add sync flag
sync_indexing_cache_key = "document_{}_is_sync".format(document.id)
sync_indexing_cache_key = f"document_{document.id}_is_sync"
cache_result = redis_client.get(sync_indexing_cache_key)
if cache_result is not None:
raise ValueError("Document is being synced, please try again later")
@@ -1174,12 +1175,13 @@ class DocumentService:
)
else:
logging.warning(
f"Invalid process rule mode: {process_rule.mode}, can not find dataset process rule"
"Invalid process rule mode: %s, can not find dataset process rule",
process_rule.mode,
)
return
db.session.add(dataset_process_rule)
db.session.commit()
lock_name = "add_document_lock_dataset_id_{}".format(dataset.id)
lock_name = f"add_document_lock_dataset_id_{dataset.id}"
with redis_client.lock(lock_name, timeout=600):
position = DocumentService.get_documents_position(dataset.id)
document_ids = []
@@ -1862,7 +1864,7 @@ class DocumentService:
task_func.delay(*task_args)
except Exception as e:
# Log the error but do not rollback the transaction
logging.exception(f"Error executing async task for document {update_info['document'].id}")
logging.exception("Error executing async task for document %s", update_info["document"].id)
# don't raise the error immediately, but capture it for later
propagation_error = e
try:
@@ -1873,7 +1875,7 @@ class DocumentService:
redis_client.setex(indexing_cache_key, 600, 1)
except Exception as e:
# Log the error but do not rollback the transaction
logging.exception(f"Error setting cache for document {update_info['document'].id}")
logging.exception("Error setting cache for document %s", update_info["document"].id)
# Raise any propagation error after all updates
if propagation_error:
raise propagation_error
@@ -2001,7 +2003,7 @@ class SegmentService:
)
# calc embedding use tokens
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content])[0]
lock_name = "add_segment_lock_document_id_{}".format(document.id)
lock_name = f"add_segment_lock_document_id_{document.id}"
with redis_client.lock(lock_name, timeout=600):
max_position = (
db.session.query(func.max(DocumentSegment.position))
@@ -2048,7 +2050,7 @@ class SegmentService:
@classmethod
def multi_create_segment(cls, segments: list, document: Document, dataset: Dataset):
lock_name = "multi_add_segment_lock_document_id_{}".format(document.id)
lock_name = f"multi_add_segment_lock_document_id_{document.id}"
increment_word_count = 0
with redis_client.lock(lock_name, timeout=600):
embedding_model = None
@@ -2130,7 +2132,7 @@ class SegmentService:
@classmethod
def update_segment(cls, args: SegmentUpdateArgs, segment: DocumentSegment, document: Document, dataset: Dataset):
indexing_cache_key = "segment_{}_indexing".format(segment.id)
indexing_cache_key = f"segment_{segment.id}_indexing"
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
raise ValueError("Segment is indexing, please try again later")
@@ -2300,7 +2302,7 @@ class SegmentService:
@classmethod
def delete_segment(cls, segment: DocumentSegment, document: Document, dataset: Dataset):
indexing_cache_key = "segment_{}_delete_indexing".format(segment.id)
indexing_cache_key = f"segment_{segment.id}_delete_indexing"
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
raise ValueError("Segment is deleting.")
@@ -2352,7 +2354,7 @@ class SegmentService:
return
real_deal_segmment_ids = []
for segment in segments:
indexing_cache_key = "segment_{}_indexing".format(segment.id)
indexing_cache_key = f"segment_{segment.id}_indexing"
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
continue
@@ -2379,7 +2381,7 @@ class SegmentService:
return
real_deal_segmment_ids = []
for segment in segments:
indexing_cache_key = "segment_{}_indexing".format(segment.id)
indexing_cache_key = f"segment_{segment.id}_indexing"
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
continue
@@ -2398,7 +2400,7 @@ class SegmentService:
def create_child_chunk(
cls, content: str, segment: DocumentSegment, document: Document, dataset: Dataset
) -> ChildChunk:
lock_name = "add_child_lock_{}".format(segment.id)
lock_name = f"add_child_lock_{segment.id}"
with redis_client.lock(lock_name, timeout=20):
index_node_id = str(uuid.uuid4())
index_node_hash = helper.generate_text_hash(content)