feat: mypy for all type check (#10921)

This commit is contained in:
yihong
2024-12-24 18:38:51 +08:00
committed by GitHub
parent c91e8b1737
commit 56e15d09a9
584 changed files with 3975 additions and 2826 deletions

View File

@@ -6,6 +6,7 @@ from flask import Flask, current_app
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.models.document import Document
from core.rag.rerank.rerank_type import RerankMode
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
@@ -31,7 +32,7 @@ class RetrievalService:
top_k: int,
score_threshold: Optional[float] = 0.0,
reranking_model: Optional[dict] = None,
reranking_mode: Optional[str] = "reranking_model",
reranking_mode: str = "reranking_model",
weights: Optional[dict] = None,
):
if not query:
@@ -42,15 +43,15 @@ class RetrievalService:
if not dataset or dataset.available_document_count == 0 or dataset.available_segment_count == 0:
return []
all_documents = []
threads = []
exceptions = []
all_documents: list[Document] = []
threads: list[threading.Thread] = []
exceptions: list[str] = []
# retrieval_model source with keyword
if retrieval_method == "keyword_search":
keyword_thread = threading.Thread(
target=RetrievalService.keyword_search,
kwargs={
"flask_app": current_app._get_current_object(),
"flask_app": current_app._get_current_object(), # type: ignore
"dataset_id": dataset_id,
"query": query,
"top_k": top_k,
@@ -65,7 +66,7 @@ class RetrievalService:
embedding_thread = threading.Thread(
target=RetrievalService.embedding_search,
kwargs={
"flask_app": current_app._get_current_object(),
"flask_app": current_app._get_current_object(), # type: ignore
"dataset_id": dataset_id,
"query": query,
"top_k": top_k,
@@ -84,7 +85,7 @@ class RetrievalService:
full_text_index_thread = threading.Thread(
target=RetrievalService.full_text_index_search,
kwargs={
"flask_app": current_app._get_current_object(),
"flask_app": current_app._get_current_object(), # type: ignore
"dataset_id": dataset_id,
"query": query,
"retrieval_method": retrieval_method,
@@ -124,7 +125,7 @@ class RetrievalService:
if not dataset:
return []
all_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
dataset.tenant_id, dataset_id, query, external_retrieval_model
dataset.tenant_id, dataset_id, query, external_retrieval_model or {}
)
return all_documents
@@ -135,6 +136,8 @@ class RetrievalService:
with flask_app.app_context():
try:
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
raise ValueError("dataset not found")
keyword = Keyword(dataset=dataset)
@@ -159,6 +162,8 @@ class RetrievalService:
with flask_app.app_context():
try:
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
raise ValueError("dataset not found")
vector = Vector(dataset=dataset)
@@ -209,6 +214,8 @@ class RetrievalService:
with flask_app.app_context():
try:
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
raise ValueError("dataset not found")
vector_processor = Vector(
dataset=dataset,