fix: improve document filtering in full text search(elasticsearch) (#22683)

This commit is contained in:
uply23333
2025-07-21 15:59:37 +08:00
committed by GitHub
parent c7382150b5
commit ab012fe1a2

View File

@@ -147,10 +147,17 @@ class ElasticSearchVector(BaseVector):
return docs return docs
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
query_str = {"match": {Field.CONTENT_KEY.value: query}} query_str: dict[str, Any] = {"match": {Field.CONTENT_KEY.value: query}}
document_ids_filter = kwargs.get("document_ids_filter") document_ids_filter = kwargs.get("document_ids_filter")
if document_ids_filter: if document_ids_filter:
query_str["filter"] = {"terms": {"metadata.document_id": document_ids_filter}} # type: ignore query_str = {
"bool": {
"must": {"match": {Field.CONTENT_KEY.value: query}},
"filter": {"terms": {"metadata.document_id": document_ids_filter}},
}
}
results = self._client.search(index=self._collection_name, query=query_str, size=kwargs.get("top_k", 4)) results = self._client.search(index=self._collection_name, query=query_str, size=kwargs.get("top_k", 4))
docs = [] docs = []
for hit in results["hits"]["hits"]: for hit in results["hits"]["hits"]: