Feat/add retriever rerank (#1560)
Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
@@ -3,7 +3,7 @@ import pickle
|
||||
from json import JSONDecodeError
|
||||
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.account import Account
|
||||
@@ -15,6 +15,7 @@ class Dataset(db.Model):
|
||||
__table_args__ = (
|
||||
db.PrimaryKeyConstraint('id', name='dataset_pkey'),
|
||||
db.Index('dataset_tenant_idx', 'tenant_id'),
|
||||
db.Index('retrieval_model_idx', "retrieval_model", postgresql_using='gin')
|
||||
)
|
||||
|
||||
INDEXING_TECHNIQUE_LIST = ['high_quality', 'economy']
|
||||
@@ -39,7 +40,7 @@ class Dataset(db.Model):
|
||||
embedding_model = db.Column(db.String(255), nullable=True)
|
||||
embedding_model_provider = db.Column(db.String(255), nullable=True)
|
||||
collection_binding_id = db.Column(UUID, nullable=True)
|
||||
|
||||
retrieval_model = db.Column(JSONB, nullable=True)
|
||||
|
||||
@property
|
||||
def dataset_keyword_table(self):
|
||||
@@ -93,6 +94,20 @@ class Dataset(db.Model):
|
||||
return Document.query.with_entities(func.coalesce(func.sum(Document.word_count))) \
|
||||
.filter(Document.dataset_id == self.id).scalar()
|
||||
|
||||
@property
|
||||
def retrieval_model_dict(self):
|
||||
default_retrieval_model = {
|
||||
'search_method': 'semantic_search',
|
||||
'reranking_enable': False,
|
||||
'reranking_model': {
|
||||
'reranking_provider_name': '',
|
||||
'reranking_model_name': ''
|
||||
},
|
||||
'top_k': 2,
|
||||
'score_threshold_enable': False
|
||||
}
|
||||
return self.retrieval_model if self.retrieval_model else default_retrieval_model
|
||||
|
||||
|
||||
class DatasetProcessRule(db.Model):
|
||||
__tablename__ = 'dataset_process_rules'
|
||||
@@ -120,7 +135,7 @@ class DatasetProcessRule(db.Model):
|
||||
],
|
||||
'segmentation': {
|
||||
'delimiter': '\n',
|
||||
'max_tokens': 1000
|
||||
'max_tokens': 512
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,4 +477,3 @@ class DatasetCollectionBinding(db.Model):
|
||||
model_name = db.Column(db.String(40), nullable=False)
|
||||
collection_name = db.Column(db.String(64), nullable=False)
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)'))
|
||||
|
||||
|
@@ -160,7 +160,13 @@ class AppModelConfig(db.Model):
|
||||
|
||||
@property
|
||||
def dataset_configs_dict(self) -> dict:
|
||||
return json.loads(self.dataset_configs) if self.dataset_configs else {"top_k": 2, "score_threshold": {"enable": False}}
|
||||
if self.dataset_configs:
|
||||
dataset_configs = json.loads(self.dataset_configs)
|
||||
if 'retrieval_model' not in dataset_configs:
|
||||
return {'retrieval_model': 'single'}
|
||||
else:
|
||||
return dataset_configs
|
||||
return {'retrieval_model': 'single'}
|
||||
|
||||
@property
|
||||
def file_upload_dict(self) -> dict:
|
||||
|
Reference in New Issue
Block a user