feat: mypy for all type check (#10921)
This commit is contained in:
@@ -4,7 +4,7 @@ import time
|
||||
import uuid
|
||||
|
||||
import click
|
||||
from celery import shared_task
|
||||
from celery import shared_task # type: ignore
|
||||
from sqlalchemy import func
|
||||
|
||||
from core.indexing_runner import IndexingRunner
|
||||
@@ -58,12 +58,13 @@ def batch_create_segment_to_index_task(
|
||||
model=dataset.embedding_model,
|
||||
)
|
||||
word_count_change = 0
|
||||
segments_to_insert: list[str] = [] # Explicitly type hint the list as List[str]
|
||||
for segment in content:
|
||||
content = segment["content"]
|
||||
content_str = segment["content"]
|
||||
doc_id = str(uuid.uuid4())
|
||||
segment_hash = helper.generate_text_hash(content)
|
||||
segment_hash = helper.generate_text_hash(content_str)
|
||||
# calc embedding use tokens
|
||||
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content]) if embedding_model else 0
|
||||
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content_str]) if embedding_model else 0
|
||||
max_position = (
|
||||
db.session.query(func.max(DocumentSegment.position))
|
||||
.filter(DocumentSegment.document_id == dataset_document.id)
|
||||
@@ -90,6 +91,7 @@ def batch_create_segment_to_index_task(
|
||||
word_count_change += segment_document.word_count
|
||||
db.session.add(segment_document)
|
||||
document_segments.append(segment_document)
|
||||
segments_to_insert.append(str(segment)) # Cast to string if needed
|
||||
# update document word count
|
||||
dataset_document.word_count += word_count_change
|
||||
db.session.add(dataset_document)
|
||||
|
Reference in New Issue
Block a user