Sync INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH between API and Web (#11230)

This commit is contained in:
Hiroshi Fujita
2024-12-02 16:29:25 +09:00
committed by GitHub
parent f8c966c39c
commit 1d8385f7ac
29 changed files with 51 additions and 40 deletions

View File

@@ -122,7 +122,8 @@ const StepTwo = ({
const setSegmentIdentifier = useCallback((value: string) => {
doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
}, [])
const [max, setMax] = useState(4000) // default chunk length
const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length
const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
const [overlap, setOverlap] = useState(50)
const [rules, setRules] = useState<PreProcessingRule[]>([])
const [defaultConfig, setDefaultConfig] = useState<Rules>()
@@ -196,7 +197,7 @@ const StepTwo = ({
const resetRules = () => {
if (defaultConfig) {
setSegmentIdentifier(defaultConfig.segmentation.separator)
setMax(defaultConfig.segmentation.max_tokens)
setMaxChunkLength(defaultConfig.segmentation.max_tokens)
setOverlap(defaultConfig.segmentation.chunk_overlap)
setRules(defaultConfig.pre_processing_rules)
}
@@ -212,8 +213,8 @@ const StepTwo = ({
}
const confirmChangeCustomConfig = () => {
if (segmentationType === SegmentType.CUSTOM && max > 4000) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
return
}
setCustomFileIndexingEstimate(null)
@@ -234,7 +235,7 @@ const StepTwo = ({
pre_processing_rules: rules,
segmentation: {
separator: unescape(segmentIdentifier),
max_tokens: max,
max_tokens: maxChunkLength,
chunk_overlap: overlap,
},
}
@@ -339,12 +340,12 @@ const StepTwo = ({
)
const getCreationParams = () => {
let params
if (segmentationType === SegmentType.CUSTOM && overlap > max) {
if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
return
}
if (segmentationType === SegmentType.CUSTOM && max > 4000) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
return
}
if (isSetting) {
@@ -415,7 +416,8 @@ const StepTwo = ({
const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
const separator = res.rules.segmentation.separator
setSegmentIdentifier(separator)
setMax(res.rules.segmentation.max_tokens)
setMaxChunkLength(res.rules.segmentation.max_tokens)
setLimitMaxChunkLength(res.limits.indexing_max_segmentation_tokens_length)
setOverlap(res.rules.segmentation.chunk_overlap)
setRules(res.rules.pre_processing_rules)
setDefaultConfig(res.rules)
@@ -432,7 +434,7 @@ const StepTwo = ({
const max = rules.segmentation.max_tokens
const overlap = rules.segmentation.chunk_overlap
setSegmentIdentifier(separator)
setMax(max)
setMaxChunkLength(max)
setOverlap(overlap)
setRules(rules.pre_processing_rules)
setDefaultConfig(rules)
@@ -670,10 +672,10 @@ const StepTwo = ({
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
max={4000}
value={maxChunkLength}
max={limitMaxChunkLength}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
onChange={e => setMaxChunkLength(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
</div>
</div>