@@ -534,7 +534,7 @@ class IndexingRunner:
|
||||
# chunk nodes by chunk size
|
||||
indexing_start_at = time.perf_counter()
|
||||
tokens = 0
|
||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
|
||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
|
||||
# create keyword index
|
||||
create_keyword_thread = threading.Thread(
|
||||
target=self._process_keyword_index,
|
||||
@@ -572,7 +572,7 @@ class IndexingRunner:
|
||||
|
||||
for future in futures:
|
||||
tokens += future.result()
|
||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
|
||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
|
||||
create_keyword_thread.join()
|
||||
indexing_end_at = time.perf_counter()
|
||||
|
||||
|
@@ -76,6 +76,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
if dataset.indexing_technique == "high_quality":
|
||||
vector = Vector(dataset)
|
||||
vector.create(documents)
|
||||
with_keywords = False
|
||||
if with_keywords:
|
||||
keywords_list = kwargs.get("keywords_list")
|
||||
keyword = Keyword(dataset)
|
||||
@@ -91,6 +92,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
||||
vector.delete_by_ids(node_ids)
|
||||
else:
|
||||
vector.delete()
|
||||
with_keywords = False
|
||||
if with_keywords:
|
||||
keyword = Keyword(dataset)
|
||||
if node_ids:
|
||||
|
@@ -97,16 +97,16 @@ class VectorService:
|
||||
vector = Vector(dataset=dataset)
|
||||
vector.delete_by_ids([segment.index_node_id])
|
||||
vector.add_texts([document], duplicate_check=True)
|
||||
|
||||
# update keyword index
|
||||
keyword = Keyword(dataset)
|
||||
keyword.delete_by_ids([segment.index_node_id])
|
||||
|
||||
# save keyword index
|
||||
if keywords and len(keywords) > 0:
|
||||
keyword.add_texts([document], keywords_list=[keywords])
|
||||
else:
|
||||
keyword.add_texts([document])
|
||||
# update keyword index
|
||||
keyword = Keyword(dataset)
|
||||
keyword.delete_by_ids([segment.index_node_id])
|
||||
|
||||
# save keyword index
|
||||
if keywords and len(keywords) > 0:
|
||||
keyword.add_texts([document], keywords_list=[keywords])
|
||||
else:
|
||||
keyword.add_texts([document])
|
||||
|
||||
@classmethod
|
||||
def generate_child_chunks(
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import React, { type FC, useMemo, useState } from 'react'
|
||||
import React, { type FC, useCallback, useMemo, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import {
|
||||
RiCloseLine,
|
||||
@@ -16,8 +16,10 @@ import { useSegmentListContext } from './index'
|
||||
import { ChunkingMode, type SegmentDetailModel } from '@/models/datasets'
|
||||
import { useEventEmitterContextContext } from '@/context/event-emitter'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
import classNames from '@/utils/classnames'
|
||||
import cn from '@/utils/classnames'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { IndexingType } from '../../../create/step-two'
|
||||
|
||||
type ISegmentDetailProps = {
|
||||
segInfo?: Partial<SegmentDetailModel> & { id: string }
|
||||
@@ -48,6 +50,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
||||
const mode = useDocumentContext(s => s.mode)
|
||||
const parentMode = useDocumentContext(s => s.parentMode)
|
||||
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
|
||||
|
||||
eventEmitter?.useSubscription((v) => {
|
||||
if (v === 'update-segment')
|
||||
@@ -56,56 +59,41 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
setLoading(false)
|
||||
})
|
||||
|
||||
const handleCancel = () => {
|
||||
const handleCancel = useCallback(() => {
|
||||
onCancel()
|
||||
}
|
||||
}, [onCancel])
|
||||
|
||||
const handleSave = () => {
|
||||
const handleSave = useCallback(() => {
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords)
|
||||
}
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords])
|
||||
|
||||
const handleRegeneration = () => {
|
||||
const handleRegeneration = useCallback(() => {
|
||||
setShowRegenerationModal(true)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const onCancelRegeneration = () => {
|
||||
const onCancelRegeneration = useCallback(() => {
|
||||
setShowRegenerationModal(false)
|
||||
}
|
||||
}, [])
|
||||
|
||||
const onConfirmRegeneration = () => {
|
||||
const onConfirmRegeneration = useCallback(() => {
|
||||
onUpdate(segInfo?.id || '', question, answer, keywords, true)
|
||||
}
|
||||
|
||||
const isParentChildMode = useMemo(() => {
|
||||
return mode === 'hierarchical'
|
||||
}, [mode])
|
||||
|
||||
const isFullDocMode = useMemo(() => {
|
||||
return mode === 'hierarchical' && parentMode === 'full-doc'
|
||||
}, [mode, parentMode])
|
||||
|
||||
const titleText = useMemo(() => {
|
||||
return isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
|
||||
}, [isEditMode, t])
|
||||
|
||||
const isQAModel = useMemo(() => {
|
||||
return docForm === ChunkingMode.qa
|
||||
}, [docForm])
|
||||
}, [onUpdate, segInfo?.id, question, answer, keywords])
|
||||
|
||||
const wordCountText = useMemo(() => {
|
||||
const contentLength = isQAModel ? (question.length + answer.length) : question.length
|
||||
const contentLength = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length
|
||||
const total = formatNumber(isEditMode ? contentLength : segInfo!.word_count as number)
|
||||
const count = isEditMode ? contentLength : segInfo!.word_count as number
|
||||
return `${total} ${t('datasetDocuments.segment.characters', { count })}`
|
||||
}, [isEditMode, question.length, answer.length, isQAModel, segInfo, t])
|
||||
}, [isEditMode, question.length, answer.length, docForm, segInfo, t])
|
||||
|
||||
const labelPrefix = useMemo(() => {
|
||||
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
|
||||
}, [isParentChildMode, t])
|
||||
const isFullDocMode = mode === 'hierarchical' && parentMode === 'full-doc'
|
||||
const titleText = isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
|
||||
const labelPrefix = mode === 'hierarchical' ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
|
||||
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
|
||||
|
||||
return (
|
||||
<div className={'flex h-full flex-col'}>
|
||||
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
|
||||
<div className={cn('flex items-center justify-between', fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3')}>
|
||||
<div className='flex flex-col'>
|
||||
<div className='system-xl-semibold text-text-primary'>{titleText}</div>
|
||||
<div className='flex items-center gap-x-2'>
|
||||
@@ -134,12 +122,12 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className={classNames(
|
||||
<div className={cn(
|
||||
'flex grow',
|
||||
fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4',
|
||||
!isEditMode && 'pb-0 overflow-hidden',
|
||||
fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3',
|
||||
!isEditMode && 'overflow-hidden pb-0',
|
||||
)}>
|
||||
<div className={classNames(isEditMode ? 'break-all whitespace-pre-line overflow-hidden' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}>
|
||||
<div className={cn(isEditMode ? 'overflow-hidden whitespace-pre-line break-all' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}>
|
||||
<ChunkContent
|
||||
docForm={docForm}
|
||||
question={question}
|
||||
@@ -149,7 +137,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
||||
isEditMode={isEditMode}
|
||||
/>
|
||||
</div>
|
||||
{mode === 'custom' && <Keywords
|
||||
{isECOIndexing && <Keywords
|
||||
className={fullScreen ? 'w-1/5' : ''}
|
||||
actionType={isEditMode ? 'edit' : 'view'}
|
||||
segInfo={segInfo}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
import { memo, useMemo, useRef, useState } from 'react'
|
||||
import { memo, useCallback, useMemo, useRef, useState } from 'react'
|
||||
import type { FC } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useContext } from 'use-context-selector'
|
||||
@@ -12,7 +12,6 @@ import Keywords from './completed/common/keywords'
|
||||
import ChunkContent from './completed/common/chunk-content'
|
||||
import AddAnother from './completed/common/add-another'
|
||||
import Dot from './completed/common/dot'
|
||||
import { useDocumentContext } from './index'
|
||||
import { useStore as useAppStore } from '@/app/components/app/store'
|
||||
import { ToastContext } from '@/app/components/base/toast'
|
||||
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
|
||||
@@ -20,6 +19,8 @@ import classNames from '@/utils/classnames'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
import Divider from '@/app/components/base/divider'
|
||||
import { useAddSegment } from '@/service/knowledge/use-segment'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { IndexingType } from '../../create/step-two'
|
||||
|
||||
type NewSegmentModalProps = {
|
||||
onCancel: () => void
|
||||
@@ -44,39 +45,37 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
||||
const [addAnother, setAddAnother] = useState(true)
|
||||
const fullScreen = useSegmentListContext(s => s.fullScreen)
|
||||
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
||||
const mode = useDocumentContext(s => s.mode)
|
||||
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
|
||||
const { appSidebarExpand } = useAppStore(useShallow(state => ({
|
||||
appSidebarExpand: state.appSidebarExpand,
|
||||
})))
|
||||
const refreshTimer = useRef<any>(null)
|
||||
|
||||
const CustomButton = <>
|
||||
<Divider type='vertical' className='mx-1 h-3 bg-divider-regular' />
|
||||
<button
|
||||
type='button'
|
||||
className='system-xs-semibold text-text-accent'
|
||||
onClick={() => {
|
||||
clearTimeout(refreshTimer.current)
|
||||
viewNewlyAddedChunk()
|
||||
}}>
|
||||
{t('common.operation.view')}
|
||||
</button>
|
||||
</>
|
||||
const CustomButton = useMemo(() => (
|
||||
<>
|
||||
<Divider type='vertical' className='mx-1 h-3 bg-divider-regular' />
|
||||
<button
|
||||
type='button'
|
||||
className='system-xs-semibold text-text-accent'
|
||||
onClick={() => {
|
||||
clearTimeout(refreshTimer.current)
|
||||
viewNewlyAddedChunk()
|
||||
}}>
|
||||
{t('common.operation.view')}
|
||||
</button>
|
||||
</>
|
||||
), [viewNewlyAddedChunk, t])
|
||||
|
||||
const isQAModel = useMemo(() => {
|
||||
return docForm === ChunkingMode.qa
|
||||
}, [docForm])
|
||||
|
||||
const handleCancel = (actionType: 'esc' | 'add' = 'esc') => {
|
||||
const handleCancel = useCallback((actionType: 'esc' | 'add' = 'esc') => {
|
||||
if (actionType === 'esc' || !addAnother)
|
||||
onCancel()
|
||||
}
|
||||
}, [onCancel, addAnother])
|
||||
|
||||
const { mutateAsync: addSegment } = useAddSegment()
|
||||
|
||||
const handleSave = async () => {
|
||||
const handleSave = useCallback(async () => {
|
||||
const params: SegmentUpdater = { content: '' }
|
||||
if (isQAModel) {
|
||||
if (docForm === ChunkingMode.qa) {
|
||||
if (!question.trim()) {
|
||||
return notify({
|
||||
type: 'error',
|
||||
@@ -129,21 +128,27 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
||||
setLoading(false)
|
||||
},
|
||||
})
|
||||
}
|
||||
}, [docForm, keywords, addSegment, datasetId, documentId, question, answer, notify, t, appSidebarExpand, CustomButton, handleCancel, onSave])
|
||||
|
||||
const wordCountText = useMemo(() => {
|
||||
const count = isQAModel ? (question.length + answer.length) : question.length
|
||||
const count = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length
|
||||
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [question.length, answer.length, isQAModel])
|
||||
}, [question.length, answer.length, docForm, t])
|
||||
|
||||
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
|
||||
|
||||
return (
|
||||
<div className={'flex h-full flex-col'}>
|
||||
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
|
||||
<div
|
||||
className={classNames(
|
||||
'flex items-center justify-between',
|
||||
fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3',
|
||||
)}
|
||||
>
|
||||
<div className='flex flex-col'>
|
||||
<div className='system-xl-semibold text-text-primary'>{
|
||||
t('datasetDocuments.segment.addChunk')
|
||||
}</div>
|
||||
<div className='system-xl-semibold text-text-primary'>
|
||||
{t('datasetDocuments.segment.addChunk')}
|
||||
</div>
|
||||
<div className='flex items-center gap-x-2'>
|
||||
<SegmentIndexTag label={t('datasetDocuments.segment.newChunk')!} />
|
||||
<Dot />
|
||||
@@ -171,8 +176,8 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4')}>
|
||||
<div className={classNames('break-all overflow-hidden whitespace-pre-line', fullScreen ? 'w-1/2' : 'grow')}>
|
||||
<div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3')}>
|
||||
<div className={classNames('overflow-hidden whitespace-pre-line break-all', fullScreen ? 'w-1/2' : 'grow')}>
|
||||
<ChunkContent
|
||||
docForm={docForm}
|
||||
question={question}
|
||||
@@ -182,7 +187,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
||||
isEditMode={true}
|
||||
/>
|
||||
</div>
|
||||
{mode === 'custom' && <Keywords
|
||||
{isECOIndexing && <Keywords
|
||||
className={fullScreen ? 'w-1/5' : ''}
|
||||
actionType='add'
|
||||
keywords={keywords}
|
||||
|
@@ -213,7 +213,7 @@ export default combine(
|
||||
settings: {
|
||||
tailwindcss: {
|
||||
// These are the default values but feel free to customize
|
||||
callees: ['classnames', 'clsx', 'ctl', 'cn'],
|
||||
callees: ['classnames', 'clsx', 'ctl', 'cn', 'classNames'],
|
||||
config: 'tailwind.config.js', // returned from `loadConfig()` utility if not provided
|
||||
cssFiles: [
|
||||
'**/*.css',
|
||||
|
Reference in New Issue
Block a user