diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index c0c0d21a2..fea4d0edf 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -97,6 +97,7 @@ class RetrievalService: all_documents=all_documents, retrieval_method=retrieval_method, exceptions=exceptions, + document_ids_filter=document_ids_filter, ) ) concurrent.futures.wait(futures, timeout=30, return_when=concurrent.futures.ALL_COMPLETED) @@ -222,6 +223,7 @@ class RetrievalService: all_documents: list, retrieval_method: str, exceptions: list, + document_ids_filter: Optional[list[str]] = None, ): with flask_app.app_context(): try: @@ -231,7 +233,9 @@ class RetrievalService: vector_processor = Vector(dataset=dataset) - documents = vector_processor.search_by_full_text(cls.escape_query_for_search(query), top_k=top_k) + documents = vector_processor.search_by_full_text( + cls.escape_query_for_search(query), top_k=top_k, document_ids_filter=document_ids_filter + ) if documents: if ( reranking_model diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index f56b2db8a..2fe46197f 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -610,7 +610,11 @@ class DatasetRetrieval: if dataset.indexing_technique == "economy": # use keyword table query documents = RetrievalService.retrieve( - retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=top_k + retrieval_method="keyword_search", + dataset_id=dataset.id, + query=query, + top_k=top_k, + document_ids_filter=document_ids_filter, ) if documents: all_documents.extend(documents)