Support knowledge metadata filter (#15982)

This commit is contained in:
Jyong
2025-03-18 16:42:19 +08:00
committed by GitHub
parent b65f2eb55f
commit abeaea4f79
48 changed files with 2502 additions and 574 deletions

View File

@@ -41,6 +41,7 @@ class RetrievalService:
reranking_model: Optional[dict] = None,
reranking_mode: str = "reranking_model",
weights: Optional[dict] = None,
document_ids_filter: Optional[list[str]] = None,
):
if not query:
return []
@@ -64,6 +65,7 @@ class RetrievalService:
top_k=top_k,
all_documents=all_documents,
exceptions=exceptions,
document_ids_filter=document_ids_filter,
)
)
if RetrievalMethod.is_support_semantic_search(retrieval_method):
@@ -79,6 +81,7 @@ class RetrievalService:
all_documents=all_documents,
retrieval_method=retrieval_method,
exceptions=exceptions,
document_ids_filter=document_ids_filter,
)
)
if RetrievalMethod.is_support_fulltext_search(retrieval_method):
@@ -130,7 +133,14 @@ class RetrievalService:
@classmethod
def keyword_search(
cls, flask_app: Flask, dataset_id: str, query: str, top_k: int, all_documents: list, exceptions: list
cls,
flask_app: Flask,
dataset_id: str,
query: str,
top_k: int,
all_documents: list,
exceptions: list,
document_ids_filter: Optional[list[str]] = None,
):
with flask_app.app_context():
try:
@@ -139,7 +149,10 @@ class RetrievalService:
raise ValueError("dataset not found")
keyword = Keyword(dataset=dataset)
documents = keyword.search(cls.escape_query_for_search(query), top_k=top_k)
documents = keyword.search(
cls.escape_query_for_search(query), top_k=top_k, document_ids_filter=document_ids_filter
)
all_documents.extend(documents)
except Exception as e:
exceptions.append(str(e))
@@ -156,6 +169,7 @@ class RetrievalService:
all_documents: list,
retrieval_method: str,
exceptions: list,
document_ids_filter: Optional[list[str]] = None,
):
with flask_app.app_context():
try:
@@ -170,6 +184,7 @@ class RetrievalService:
top_k=top_k,
score_threshold=score_threshold,
filter={"group_id": [dataset.id]},
document_ids_filter=document_ids_filter,
)
if documents: