Feat/delete single dataset retrival (#6570)

This commit is contained in:
Jyong
2024-07-24 12:50:11 +08:00
committed by GitHub
parent 0fb741f269
commit e4bb943fe5
22 changed files with 651 additions and 115 deletions

View File

@@ -13,13 +13,41 @@ class RerankingModelConfig(BaseModel):
model: str
class VectorSetting(BaseModel):
"""
Vector Setting.
"""
vector_weight: float
embedding_provider_name: str
embedding_model_name: str
class KeywordSetting(BaseModel):
"""
Keyword Setting.
"""
keyword_weight: float
class WeightedScoreConfig(BaseModel):
"""
Weighted score Config.
"""
weight_type: str
vector_setting: VectorSetting
keyword_setting: KeywordSetting
class MultipleRetrievalConfig(BaseModel):
"""
Multiple Retrieval Config.
"""
top_k: int
score_threshold: Optional[float] = None
reranking_mode: str = 'reranking_model'
reranking_enable: bool = True
reranking_model: RerankingModelConfig
weights: WeightedScoreConfig
class ModelConfig(BaseModel):

View File

@@ -138,13 +138,38 @@ class KnowledgeRetrievalNode(BaseNode):
planning_strategy=planning_strategy
)
elif node_data.retrieval_mode == DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE.value:
if node_data.multiple_retrieval_config.reranking_mode == 'reranking_model':
reranking_model = {
'reranking_provider_name': node_data.multiple_retrieval_config.reranking_model['provider'],
'reranking_model_name': node_data.multiple_retrieval_config.reranking_model['name']
}
weights = None
elif node_data.multiple_retrieval_config.reranking_mode == 'weighted_score':
reranking_model = None
weights = {
'weight_type': node_data.multiple_retrieval_config.weights.weight_type,
'vector_setting': {
"vector_weight": node_data.multiple_retrieval_config.weights.vector_setting.vector_weight,
"embedding_provider_name": node_data.multiple_retrieval_config.weights.vector_setting.embedding_provider_name,
"embedding_model_name": node_data.multiple_retrieval_config.weights.vector_setting.embedding_model_name,
},
'keyword_setting': {
"keyword_weight": node_data.multiple_retrieval_config.weights.keyword_setting.keyword_weight
}
}
else:
reranking_model = None
weights = None
all_documents = dataset_retrieval.multiple_retrieve(self.app_id, self.tenant_id, self.user_id,
self.user_from.value,
available_datasets, query,
node_data.multiple_retrieval_config.top_k,
node_data.multiple_retrieval_config.score_threshold,
node_data.multiple_retrieval_config.reranking_model.provider,
node_data.multiple_retrieval_config.reranking_model.model)
node_data.multiple_retrieval_config.reranking_mode,
reranking_model,
weights,
node_data.multiple_retrieval_config.reranking_enable,
)
context_list = []
if all_documents: