allow to config max segmentation tokens length for RAG document using environment variable (#4375)

This commit is contained in:
Rain Chen
2024-05-20 13:20:27 +08:00
committed by GitHub
parent b5204111da
commit c255a20d7c
5 changed files with 21 additions and 4 deletions

View File

@@ -411,8 +411,9 @@ class IndexingRunner:
# The user-defined segmentation rule
rules = json.loads(processing_rule.rules)
segmentation = rules["segmentation"]
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > 1000:
raise ValueError("Custom segment length should be between 50 and 1000.")
max_segmentation_tokens_length = int(current_app.config['INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH'])
if segmentation["max_tokens"] < 50 or segmentation["max_tokens"] > max_segmentation_tokens_length:
raise ValueError(f"Custom segment length should be between 50 and {max_segmentation_tokens_length}.")
separator = segmentation["separator"]
if separator: