Chore: clean some # type: ignore (#25157)
This commit is contained in:
@@ -270,7 +270,9 @@ class IndexingRunner:
|
|||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
model_type=ModelType.TEXT_EMBEDDING,
|
model_type=ModelType.TEXT_EMBEDDING,
|
||||||
)
|
)
|
||||||
preview_texts = [] # type: ignore
|
# keep separate, avoid union-list ambiguity
|
||||||
|
preview_texts: list[PreviewDetail] = []
|
||||||
|
qa_preview_texts: list[QAPreviewDetail] = []
|
||||||
|
|
||||||
total_segments = 0
|
total_segments = 0
|
||||||
index_type = doc_form
|
index_type = doc_form
|
||||||
@@ -293,14 +295,14 @@ class IndexingRunner:
|
|||||||
for document in documents:
|
for document in documents:
|
||||||
if len(preview_texts) < 10:
|
if len(preview_texts) < 10:
|
||||||
if doc_form and doc_form == "qa_model":
|
if doc_form and doc_form == "qa_model":
|
||||||
preview_detail = QAPreviewDetail(
|
qa_detail = QAPreviewDetail(
|
||||||
question=document.page_content, answer=document.metadata.get("answer") or ""
|
question=document.page_content, answer=document.metadata.get("answer") or ""
|
||||||
)
|
)
|
||||||
preview_texts.append(preview_detail)
|
qa_preview_texts.append(qa_detail)
|
||||||
else:
|
else:
|
||||||
preview_detail = PreviewDetail(content=document.page_content) # type: ignore
|
preview_detail = PreviewDetail(content=document.page_content)
|
||||||
if document.children:
|
if document.children:
|
||||||
preview_detail.child_chunks = [child.page_content for child in document.children] # type: ignore
|
preview_detail.child_chunks = [child.page_content for child in document.children]
|
||||||
preview_texts.append(preview_detail)
|
preview_texts.append(preview_detail)
|
||||||
|
|
||||||
# delete image files and related db records
|
# delete image files and related db records
|
||||||
@@ -321,8 +323,8 @@ class IndexingRunner:
|
|||||||
db.session.delete(image_file)
|
db.session.delete(image_file)
|
||||||
|
|
||||||
if doc_form and doc_form == "qa_model":
|
if doc_form and doc_form == "qa_model":
|
||||||
return IndexingEstimate(total_segments=total_segments * 20, qa_preview=preview_texts, preview=[])
|
return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
|
||||||
return IndexingEstimate(total_segments=total_segments, preview=preview_texts) # type: ignore
|
return IndexingEstimate(total_segments=total_segments, preview=preview_texts)
|
||||||
|
|
||||||
def _extract(
|
def _extract(
|
||||||
self, index_processor: BaseIndexProcessor, dataset_document: DatasetDocument, process_rule: dict
|
self, index_processor: BaseIndexProcessor, dataset_document: DatasetDocument, process_rule: dict
|
||||||
@@ -424,6 +426,7 @@ class IndexingRunner:
|
|||||||
"""
|
"""
|
||||||
Get the NodeParser object according to the processing rule.
|
Get the NodeParser object according to the processing rule.
|
||||||
"""
|
"""
|
||||||
|
character_splitter: TextSplitter
|
||||||
if processing_rule_mode in ["custom", "hierarchical"]:
|
if processing_rule_mode in ["custom", "hierarchical"]:
|
||||||
# The user-defined segmentation rule
|
# The user-defined segmentation rule
|
||||||
max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH
|
max_segmentation_tokens_length = dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH
|
||||||
@@ -450,7 +453,7 @@ class IndexingRunner:
|
|||||||
embedding_model_instance=embedding_model_instance,
|
embedding_model_instance=embedding_model_instance,
|
||||||
)
|
)
|
||||||
|
|
||||||
return character_splitter # type: ignore
|
return character_splitter
|
||||||
|
|
||||||
def _split_to_documents_for_estimate(
|
def _split_to_documents_for_estimate(
|
||||||
self, text_docs: list[Document], splitter: TextSplitter, processing_rule: DatasetProcessRule
|
self, text_docs: list[Document], splitter: TextSplitter, processing_rule: DatasetProcessRule
|
||||||
|
@@ -36,7 +36,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||||||
if not process_rule.get("rules"):
|
if not process_rule.get("rules"):
|
||||||
raise ValueError("No rules found in process rule.")
|
raise ValueError("No rules found in process rule.")
|
||||||
rules = Rule(**process_rule.get("rules"))
|
rules = Rule(**process_rule.get("rules"))
|
||||||
all_documents = [] # type: ignore
|
all_documents: list[Document] = []
|
||||||
if rules.parent_mode == ParentMode.PARAGRAPH:
|
if rules.parent_mode == ParentMode.PARAGRAPH:
|
||||||
# Split the text documents into nodes.
|
# Split the text documents into nodes.
|
||||||
if not rules.segmentation:
|
if not rules.segmentation:
|
||||||
|
Reference in New Issue
Block a user