feat: mypy for all type check (#10921)

This commit is contained in:
yihong
2024-12-24 18:38:51 +08:00
committed by GitHub
parent c91e8b1737
commit 56e15d09a9
584 changed files with 3975 additions and 2826 deletions

View File

@@ -4,7 +4,7 @@ import time
import uuid
import click
from celery import shared_task
from celery import shared_task # type: ignore
from sqlalchemy import func
from core.indexing_runner import IndexingRunner
@@ -58,12 +58,13 @@ def batch_create_segment_to_index_task(
model=dataset.embedding_model,
)
word_count_change = 0
segments_to_insert: list[str] = [] # Explicitly type hint the list as List[str]
for segment in content:
content = segment["content"]
content_str = segment["content"]
doc_id = str(uuid.uuid4())
segment_hash = helper.generate_text_hash(content)
segment_hash = helper.generate_text_hash(content_str)
# calc embedding use tokens
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content]) if embedding_model else 0
tokens = embedding_model.get_text_embedding_num_tokens(texts=[content_str]) if embedding_model else 0
max_position = (
db.session.query(func.max(DocumentSegment.position))
.filter(DocumentSegment.document_id == dataset_document.id)
@@ -90,6 +91,7 @@ def batch_create_segment_to_index_task(
word_count_change += segment_document.word_count
db.session.add(segment_document)
document_segments.append(segment_document)
segments_to_insert.append(str(segment)) # Cast to string if needed
# update document word count
dataset_document.word_count += word_count_change
db.session.add(dataset_document)