fix overlap and splitter optimization (#2742)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2024-03-07 18:25:49 +08:00
committed by GitHub
parent b163545771
commit 8ba38e8e74
2 changed files with 4 additions and 4 deletions

View File

@@ -52,7 +52,7 @@ class BaseIndexProcessor(ABC):
character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
chunk_size=segmentation["max_tokens"],
chunk_overlap=0,
chunk_overlap=segmentation.get('chunk_overlap', 0),
fixed_separator=separator,
separators=["\n\n", "", ".", " ", ""],
embedding_model_instance=embedding_model_instance
@@ -61,7 +61,7 @@ class BaseIndexProcessor(ABC):
# Automatic segmentation
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_encoder(
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
chunk_overlap=0,
chunk_overlap=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['chunk_overlap'],
separators=["\n\n", "", ".", " ", ""],
embedding_model_instance=embedding_model_instance
)