fix:score threshold (#24897)
This commit is contained in:
@@ -256,7 +256,7 @@ class AnalyticdbVectorOpenAPI:
|
|||||||
response = self._client.query_collection_data(request)
|
response = self._client.query_collection_data(request)
|
||||||
documents = []
|
documents = []
|
||||||
for match in response.body.matches.match:
|
for match in response.body.matches.match:
|
||||||
if match.score > score_threshold:
|
if match.score >= score_threshold:
|
||||||
metadata = json.loads(match.metadata.get("metadata_"))
|
metadata = json.loads(match.metadata.get("metadata_"))
|
||||||
metadata["score"] = match.score
|
metadata["score"] = match.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
@@ -293,7 +293,7 @@ class AnalyticdbVectorOpenAPI:
|
|||||||
response = self._client.query_collection_data(request)
|
response = self._client.query_collection_data(request)
|
||||||
documents = []
|
documents = []
|
||||||
for match in response.body.matches.match:
|
for match in response.body.matches.match:
|
||||||
if match.score > score_threshold:
|
if match.score >= score_threshold:
|
||||||
metadata = json.loads(match.metadata.get("metadata_"))
|
metadata = json.loads(match.metadata.get("metadata_"))
|
||||||
metadata["score"] = match.score
|
metadata["score"] = match.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
|
@@ -229,7 +229,7 @@ class AnalyticdbVectorBySql:
|
|||||||
documents = []
|
documents = []
|
||||||
for record in cur:
|
for record in cur:
|
||||||
id, vector, score, page_content, metadata = record
|
id, vector, score, page_content, metadata = record
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=page_content,
|
page_content=page_content,
|
||||||
|
@@ -157,7 +157,7 @@ class BaiduVector(BaseVector):
|
|||||||
if meta is not None:
|
if meta is not None:
|
||||||
meta = json.loads(meta)
|
meta = json.loads(meta)
|
||||||
score = row.get("score", 0.0)
|
score = row.get("score", 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
meta["score"] = score
|
meta["score"] = score
|
||||||
doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
|
doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -120,7 +120,7 @@ class ChromaVector(BaseVector):
|
|||||||
distance = distances[index]
|
distance = distances[index]
|
||||||
metadata = dict(metadatas[index])
|
metadata = dict(metadatas[index])
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=documents[index],
|
page_content=documents[index],
|
||||||
|
@@ -216,7 +216,7 @@ class ElasticSearchVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -127,7 +127,7 @@ class HuaweiCloudVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -275,7 +275,7 @@ class LindormVectorStore(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = kwargs.get("score_threshold", 0.0) or 0.0
|
score_threshold = kwargs.get("score_threshold", 0.0) or 0.0
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -194,7 +194,7 @@ class OpenGauss(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@@ -211,7 +211,7 @@ class OpenSearchVector(BaseVector):
|
|||||||
|
|
||||||
metadata["score"] = hit["_score"]
|
metadata["score"] = hit["_score"]
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if hit["_score"] > score_threshold:
|
if hit["_score"] >= score_threshold:
|
||||||
doc = Document(page_content=hit["_source"].get(Field.CONTENT_KEY.value), metadata=metadata)
|
doc = Document(page_content=hit["_source"].get(Field.CONTENT_KEY.value), metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|
||||||
|
@@ -261,7 +261,7 @@ class OracleVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
conn.close()
|
conn.close()
|
||||||
return docs
|
return docs
|
||||||
|
@@ -202,7 +202,7 @@ class PGVectoRS(BaseVector):
|
|||||||
score = 1 - dis
|
score = 1 - dis
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
doc = Document(page_content=record.text, metadata=metadata)
|
doc = Document(page_content=record.text, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
@@ -195,7 +195,7 @@ class PGVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@@ -170,7 +170,7 @@ class VastbaseVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@@ -369,7 +369,7 @@ class QdrantVector(BaseVector):
|
|||||||
continue
|
continue
|
||||||
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
||||||
# duplicate check score threshold
|
# duplicate check score threshold
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
||||||
|
@@ -233,7 +233,7 @@ class RelytVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for document, score in results:
|
for document, score in results:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if 1 - score > score_threshold:
|
if 1 - score >= score_threshold:
|
||||||
docs.append(document)
|
docs.append(document)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@@ -300,7 +300,7 @@ class TableStoreVector(BaseVector):
|
|||||||
)
|
)
|
||||||
documents = []
|
documents = []
|
||||||
for search_hit in search_response.search_hits:
|
for search_hit in search_response.search_hits:
|
||||||
if search_hit.score > score_threshold:
|
if search_hit.score >= score_threshold:
|
||||||
ots_column_map = {}
|
ots_column_map = {}
|
||||||
for col in search_hit.row[1]:
|
for col in search_hit.row[1]:
|
||||||
ots_column_map[col[0]] = col[1]
|
ots_column_map[col[0]] = col[1]
|
||||||
|
@@ -293,7 +293,7 @@ class TencentVector(BaseVector):
|
|||||||
score = 1 - result.get("score", 0.0)
|
score = 1 - result.get("score", 0.0)
|
||||||
else:
|
else:
|
||||||
score = result.get("score", 0.0)
|
score = result.get("score", 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
meta["score"] = score
|
meta["score"] = score
|
||||||
doc = Document(page_content=result.get(self.field_text), metadata=meta)
|
doc = Document(page_content=result.get(self.field_text), metadata=meta)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -351,7 +351,7 @@ class TidbOnQdrantVector(BaseVector):
|
|||||||
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
||||||
# duplicate check score threshold
|
# duplicate check score threshold
|
||||||
score_threshold = kwargs.get("score_threshold") or 0.0
|
score_threshold = kwargs.get("score_threshold") or 0.0
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
||||||
|
@@ -110,7 +110,7 @@ class UpstashVector(BaseVector):
|
|||||||
score = record.score
|
score = record.score
|
||||||
if metadata is not None and text is not None:
|
if metadata is not None and text is not None:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
@@ -192,7 +192,7 @@ class VikingDBVector(BaseVector):
|
|||||||
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
||||||
if metadata is not None:
|
if metadata is not None:
|
||||||
metadata = json.loads(metadata)
|
metadata = json.loads(metadata)
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(page_content=result.fields.get(vdb_Field.CONTENT_KEY.value), metadata=metadata)
|
doc = Document(page_content=result.fields.get(vdb_Field.CONTENT_KEY.value), metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -220,7 +220,7 @@ class WeaviateVector(BaseVector):
|
|||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
# check score threshold
|
# check score threshold
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
@@ -123,7 +123,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
@@ -162,7 +162,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
@@ -158,7 +158,7 @@ class QAIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
Reference in New Issue
Block a user