@@ -534,7 +534,7 @@ class IndexingRunner:
|
|||||||
# chunk nodes by chunk size
|
# chunk nodes by chunk size
|
||||||
indexing_start_at = time.perf_counter()
|
indexing_start_at = time.perf_counter()
|
||||||
tokens = 0
|
tokens = 0
|
||||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
|
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
|
||||||
# create keyword index
|
# create keyword index
|
||||||
create_keyword_thread = threading.Thread(
|
create_keyword_thread = threading.Thread(
|
||||||
target=self._process_keyword_index,
|
target=self._process_keyword_index,
|
||||||
@@ -572,7 +572,7 @@ class IndexingRunner:
|
|||||||
|
|
||||||
for future in futures:
|
for future in futures:
|
||||||
tokens += future.result()
|
tokens += future.result()
|
||||||
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
|
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
|
||||||
create_keyword_thread.join()
|
create_keyword_thread.join()
|
||||||
indexing_end_at = time.perf_counter()
|
indexing_end_at = time.perf_counter()
|
||||||
|
|
||||||
|
@@ -76,6 +76,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||||||
if dataset.indexing_technique == "high_quality":
|
if dataset.indexing_technique == "high_quality":
|
||||||
vector = Vector(dataset)
|
vector = Vector(dataset)
|
||||||
vector.create(documents)
|
vector.create(documents)
|
||||||
|
with_keywords = False
|
||||||
if with_keywords:
|
if with_keywords:
|
||||||
keywords_list = kwargs.get("keywords_list")
|
keywords_list = kwargs.get("keywords_list")
|
||||||
keyword = Keyword(dataset)
|
keyword = Keyword(dataset)
|
||||||
@@ -91,6 +92,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||||||
vector.delete_by_ids(node_ids)
|
vector.delete_by_ids(node_ids)
|
||||||
else:
|
else:
|
||||||
vector.delete()
|
vector.delete()
|
||||||
|
with_keywords = False
|
||||||
if with_keywords:
|
if with_keywords:
|
||||||
keyword = Keyword(dataset)
|
keyword = Keyword(dataset)
|
||||||
if node_ids:
|
if node_ids:
|
||||||
|
@@ -97,16 +97,16 @@ class VectorService:
|
|||||||
vector = Vector(dataset=dataset)
|
vector = Vector(dataset=dataset)
|
||||||
vector.delete_by_ids([segment.index_node_id])
|
vector.delete_by_ids([segment.index_node_id])
|
||||||
vector.add_texts([document], duplicate_check=True)
|
vector.add_texts([document], duplicate_check=True)
|
||||||
|
|
||||||
# update keyword index
|
|
||||||
keyword = Keyword(dataset)
|
|
||||||
keyword.delete_by_ids([segment.index_node_id])
|
|
||||||
|
|
||||||
# save keyword index
|
|
||||||
if keywords and len(keywords) > 0:
|
|
||||||
keyword.add_texts([document], keywords_list=[keywords])
|
|
||||||
else:
|
else:
|
||||||
keyword.add_texts([document])
|
# update keyword index
|
||||||
|
keyword = Keyword(dataset)
|
||||||
|
keyword.delete_by_ids([segment.index_node_id])
|
||||||
|
|
||||||
|
# save keyword index
|
||||||
|
if keywords and len(keywords) > 0:
|
||||||
|
keyword.add_texts([document], keywords_list=[keywords])
|
||||||
|
else:
|
||||||
|
keyword.add_texts([document])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate_child_chunks(
|
def generate_child_chunks(
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import React, { type FC, useMemo, useState } from 'react'
|
import React, { type FC, useCallback, useMemo, useState } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import {
|
import {
|
||||||
RiCloseLine,
|
RiCloseLine,
|
||||||
@@ -16,8 +16,10 @@ import { useSegmentListContext } from './index'
|
|||||||
import { ChunkingMode, type SegmentDetailModel } from '@/models/datasets'
|
import { ChunkingMode, type SegmentDetailModel } from '@/models/datasets'
|
||||||
import { useEventEmitterContextContext } from '@/context/event-emitter'
|
import { useEventEmitterContextContext } from '@/context/event-emitter'
|
||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
import classNames from '@/utils/classnames'
|
import cn from '@/utils/classnames'
|
||||||
import Divider from '@/app/components/base/divider'
|
import Divider from '@/app/components/base/divider'
|
||||||
|
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||||
|
import { IndexingType } from '../../../create/step-two'
|
||||||
|
|
||||||
type ISegmentDetailProps = {
|
type ISegmentDetailProps = {
|
||||||
segInfo?: Partial<SegmentDetailModel> & { id: string }
|
segInfo?: Partial<SegmentDetailModel> & { id: string }
|
||||||
@@ -48,6 +50,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
|||||||
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
||||||
const mode = useDocumentContext(s => s.mode)
|
const mode = useDocumentContext(s => s.mode)
|
||||||
const parentMode = useDocumentContext(s => s.parentMode)
|
const parentMode = useDocumentContext(s => s.parentMode)
|
||||||
|
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
|
||||||
|
|
||||||
eventEmitter?.useSubscription((v) => {
|
eventEmitter?.useSubscription((v) => {
|
||||||
if (v === 'update-segment')
|
if (v === 'update-segment')
|
||||||
@@ -56,56 +59,41 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
|||||||
setLoading(false)
|
setLoading(false)
|
||||||
})
|
})
|
||||||
|
|
||||||
const handleCancel = () => {
|
const handleCancel = useCallback(() => {
|
||||||
onCancel()
|
onCancel()
|
||||||
}
|
}, [onCancel])
|
||||||
|
|
||||||
const handleSave = () => {
|
const handleSave = useCallback(() => {
|
||||||
onUpdate(segInfo?.id || '', question, answer, keywords)
|
onUpdate(segInfo?.id || '', question, answer, keywords)
|
||||||
}
|
}, [onUpdate, segInfo?.id, question, answer, keywords])
|
||||||
|
|
||||||
const handleRegeneration = () => {
|
const handleRegeneration = useCallback(() => {
|
||||||
setShowRegenerationModal(true)
|
setShowRegenerationModal(true)
|
||||||
}
|
}, [])
|
||||||
|
|
||||||
const onCancelRegeneration = () => {
|
const onCancelRegeneration = useCallback(() => {
|
||||||
setShowRegenerationModal(false)
|
setShowRegenerationModal(false)
|
||||||
}
|
}, [])
|
||||||
|
|
||||||
const onConfirmRegeneration = () => {
|
const onConfirmRegeneration = useCallback(() => {
|
||||||
onUpdate(segInfo?.id || '', question, answer, keywords, true)
|
onUpdate(segInfo?.id || '', question, answer, keywords, true)
|
||||||
}
|
}, [onUpdate, segInfo?.id, question, answer, keywords])
|
||||||
|
|
||||||
const isParentChildMode = useMemo(() => {
|
|
||||||
return mode === 'hierarchical'
|
|
||||||
}, [mode])
|
|
||||||
|
|
||||||
const isFullDocMode = useMemo(() => {
|
|
||||||
return mode === 'hierarchical' && parentMode === 'full-doc'
|
|
||||||
}, [mode, parentMode])
|
|
||||||
|
|
||||||
const titleText = useMemo(() => {
|
|
||||||
return isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
|
|
||||||
}, [isEditMode, t])
|
|
||||||
|
|
||||||
const isQAModel = useMemo(() => {
|
|
||||||
return docForm === ChunkingMode.qa
|
|
||||||
}, [docForm])
|
|
||||||
|
|
||||||
const wordCountText = useMemo(() => {
|
const wordCountText = useMemo(() => {
|
||||||
const contentLength = isQAModel ? (question.length + answer.length) : question.length
|
const contentLength = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length
|
||||||
const total = formatNumber(isEditMode ? contentLength : segInfo!.word_count as number)
|
const total = formatNumber(isEditMode ? contentLength : segInfo!.word_count as number)
|
||||||
const count = isEditMode ? contentLength : segInfo!.word_count as number
|
const count = isEditMode ? contentLength : segInfo!.word_count as number
|
||||||
return `${total} ${t('datasetDocuments.segment.characters', { count })}`
|
return `${total} ${t('datasetDocuments.segment.characters', { count })}`
|
||||||
}, [isEditMode, question.length, answer.length, isQAModel, segInfo, t])
|
}, [isEditMode, question.length, answer.length, docForm, segInfo, t])
|
||||||
|
|
||||||
const labelPrefix = useMemo(() => {
|
const isFullDocMode = mode === 'hierarchical' && parentMode === 'full-doc'
|
||||||
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
|
const titleText = isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
|
||||||
}, [isParentChildMode, t])
|
const labelPrefix = mode === 'hierarchical' ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
|
||||||
|
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className={'flex h-full flex-col'}>
|
<div className={'flex h-full flex-col'}>
|
||||||
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
|
<div className={cn('flex items-center justify-between', fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3')}>
|
||||||
<div className='flex flex-col'>
|
<div className='flex flex-col'>
|
||||||
<div className='system-xl-semibold text-text-primary'>{titleText}</div>
|
<div className='system-xl-semibold text-text-primary'>{titleText}</div>
|
||||||
<div className='flex items-center gap-x-2'>
|
<div className='flex items-center gap-x-2'>
|
||||||
@@ -134,12 +122,12 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className={classNames(
|
<div className={cn(
|
||||||
'flex grow',
|
'flex grow',
|
||||||
fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4',
|
fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3',
|
||||||
!isEditMode && 'pb-0 overflow-hidden',
|
!isEditMode && 'overflow-hidden pb-0',
|
||||||
)}>
|
)}>
|
||||||
<div className={classNames(isEditMode ? 'break-all whitespace-pre-line overflow-hidden' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}>
|
<div className={cn(isEditMode ? 'overflow-hidden whitespace-pre-line break-all' : 'overflow-y-auto', fullScreen ? 'w-1/2' : 'grow')}>
|
||||||
<ChunkContent
|
<ChunkContent
|
||||||
docForm={docForm}
|
docForm={docForm}
|
||||||
question={question}
|
question={question}
|
||||||
@@ -149,7 +137,7 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
|
|||||||
isEditMode={isEditMode}
|
isEditMode={isEditMode}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{mode === 'custom' && <Keywords
|
{isECOIndexing && <Keywords
|
||||||
className={fullScreen ? 'w-1/5' : ''}
|
className={fullScreen ? 'w-1/5' : ''}
|
||||||
actionType={isEditMode ? 'edit' : 'view'}
|
actionType={isEditMode ? 'edit' : 'view'}
|
||||||
segInfo={segInfo}
|
segInfo={segInfo}
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
import { memo, useMemo, useRef, useState } from 'react'
|
import { memo, useCallback, useMemo, useRef, useState } from 'react'
|
||||||
import type { FC } from 'react'
|
import type { FC } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { useContext } from 'use-context-selector'
|
import { useContext } from 'use-context-selector'
|
||||||
@@ -12,7 +12,6 @@ import Keywords from './completed/common/keywords'
|
|||||||
import ChunkContent from './completed/common/chunk-content'
|
import ChunkContent from './completed/common/chunk-content'
|
||||||
import AddAnother from './completed/common/add-another'
|
import AddAnother from './completed/common/add-another'
|
||||||
import Dot from './completed/common/dot'
|
import Dot from './completed/common/dot'
|
||||||
import { useDocumentContext } from './index'
|
|
||||||
import { useStore as useAppStore } from '@/app/components/app/store'
|
import { useStore as useAppStore } from '@/app/components/app/store'
|
||||||
import { ToastContext } from '@/app/components/base/toast'
|
import { ToastContext } from '@/app/components/base/toast'
|
||||||
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
|
import { ChunkingMode, type SegmentUpdater } from '@/models/datasets'
|
||||||
@@ -20,6 +19,8 @@ import classNames from '@/utils/classnames'
|
|||||||
import { formatNumber } from '@/utils/format'
|
import { formatNumber } from '@/utils/format'
|
||||||
import Divider from '@/app/components/base/divider'
|
import Divider from '@/app/components/base/divider'
|
||||||
import { useAddSegment } from '@/service/knowledge/use-segment'
|
import { useAddSegment } from '@/service/knowledge/use-segment'
|
||||||
|
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||||
|
import { IndexingType } from '../../create/step-two'
|
||||||
|
|
||||||
type NewSegmentModalProps = {
|
type NewSegmentModalProps = {
|
||||||
onCancel: () => void
|
onCancel: () => void
|
||||||
@@ -44,39 +45,37 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
|||||||
const [addAnother, setAddAnother] = useState(true)
|
const [addAnother, setAddAnother] = useState(true)
|
||||||
const fullScreen = useSegmentListContext(s => s.fullScreen)
|
const fullScreen = useSegmentListContext(s => s.fullScreen)
|
||||||
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
const toggleFullScreen = useSegmentListContext(s => s.toggleFullScreen)
|
||||||
const mode = useDocumentContext(s => s.mode)
|
const indexingTechnique = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
|
||||||
const { appSidebarExpand } = useAppStore(useShallow(state => ({
|
const { appSidebarExpand } = useAppStore(useShallow(state => ({
|
||||||
appSidebarExpand: state.appSidebarExpand,
|
appSidebarExpand: state.appSidebarExpand,
|
||||||
})))
|
})))
|
||||||
const refreshTimer = useRef<any>(null)
|
const refreshTimer = useRef<any>(null)
|
||||||
|
|
||||||
const CustomButton = <>
|
const CustomButton = useMemo(() => (
|
||||||
<Divider type='vertical' className='mx-1 h-3 bg-divider-regular' />
|
<>
|
||||||
<button
|
<Divider type='vertical' className='mx-1 h-3 bg-divider-regular' />
|
||||||
type='button'
|
<button
|
||||||
className='system-xs-semibold text-text-accent'
|
type='button'
|
||||||
onClick={() => {
|
className='system-xs-semibold text-text-accent'
|
||||||
clearTimeout(refreshTimer.current)
|
onClick={() => {
|
||||||
viewNewlyAddedChunk()
|
clearTimeout(refreshTimer.current)
|
||||||
}}>
|
viewNewlyAddedChunk()
|
||||||
{t('common.operation.view')}
|
}}>
|
||||||
</button>
|
{t('common.operation.view')}
|
||||||
</>
|
</button>
|
||||||
|
</>
|
||||||
|
), [viewNewlyAddedChunk, t])
|
||||||
|
|
||||||
const isQAModel = useMemo(() => {
|
const handleCancel = useCallback((actionType: 'esc' | 'add' = 'esc') => {
|
||||||
return docForm === ChunkingMode.qa
|
|
||||||
}, [docForm])
|
|
||||||
|
|
||||||
const handleCancel = (actionType: 'esc' | 'add' = 'esc') => {
|
|
||||||
if (actionType === 'esc' || !addAnother)
|
if (actionType === 'esc' || !addAnother)
|
||||||
onCancel()
|
onCancel()
|
||||||
}
|
}, [onCancel, addAnother])
|
||||||
|
|
||||||
const { mutateAsync: addSegment } = useAddSegment()
|
const { mutateAsync: addSegment } = useAddSegment()
|
||||||
|
|
||||||
const handleSave = async () => {
|
const handleSave = useCallback(async () => {
|
||||||
const params: SegmentUpdater = { content: '' }
|
const params: SegmentUpdater = { content: '' }
|
||||||
if (isQAModel) {
|
if (docForm === ChunkingMode.qa) {
|
||||||
if (!question.trim()) {
|
if (!question.trim()) {
|
||||||
return notify({
|
return notify({
|
||||||
type: 'error',
|
type: 'error',
|
||||||
@@ -129,21 +128,27 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
|||||||
setLoading(false)
|
setLoading(false)
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}, [docForm, keywords, addSegment, datasetId, documentId, question, answer, notify, t, appSidebarExpand, CustomButton, handleCancel, onSave])
|
||||||
|
|
||||||
const wordCountText = useMemo(() => {
|
const wordCountText = useMemo(() => {
|
||||||
const count = isQAModel ? (question.length + answer.length) : question.length
|
const count = docForm === ChunkingMode.qa ? (question.length + answer.length) : question.length
|
||||||
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
|
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
}, [question.length, answer.length, docForm, t])
|
||||||
}, [question.length, answer.length, isQAModel])
|
|
||||||
|
const isECOIndexing = indexingTechnique === IndexingType.ECONOMICAL
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className={'flex h-full flex-col'}>
|
<div className={'flex h-full flex-col'}>
|
||||||
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
|
<div
|
||||||
|
className={classNames(
|
||||||
|
'flex items-center justify-between',
|
||||||
|
fullScreen ? 'border border-divider-subtle py-3 pl-6 pr-4' : 'pl-4 pr-3 pt-3',
|
||||||
|
)}
|
||||||
|
>
|
||||||
<div className='flex flex-col'>
|
<div className='flex flex-col'>
|
||||||
<div className='system-xl-semibold text-text-primary'>{
|
<div className='system-xl-semibold text-text-primary'>
|
||||||
t('datasetDocuments.segment.addChunk')
|
{t('datasetDocuments.segment.addChunk')}
|
||||||
}</div>
|
</div>
|
||||||
<div className='flex items-center gap-x-2'>
|
<div className='flex items-center gap-x-2'>
|
||||||
<SegmentIndexTag label={t('datasetDocuments.segment.newChunk')!} />
|
<SegmentIndexTag label={t('datasetDocuments.segment.newChunk')!} />
|
||||||
<Dot />
|
<Dot />
|
||||||
@@ -171,8 +176,8 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center px-6 pt-6 gap-x-8' : 'flex-col gap-y-1 py-3 px-4')}>
|
<div className={classNames('flex grow', fullScreen ? 'w-full flex-row justify-center gap-x-8 px-6 pt-6' : 'flex-col gap-y-1 px-4 py-3')}>
|
||||||
<div className={classNames('break-all overflow-hidden whitespace-pre-line', fullScreen ? 'w-1/2' : 'grow')}>
|
<div className={classNames('overflow-hidden whitespace-pre-line break-all', fullScreen ? 'w-1/2' : 'grow')}>
|
||||||
<ChunkContent
|
<ChunkContent
|
||||||
docForm={docForm}
|
docForm={docForm}
|
||||||
question={question}
|
question={question}
|
||||||
@@ -182,7 +187,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
|
|||||||
isEditMode={true}
|
isEditMode={true}
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{mode === 'custom' && <Keywords
|
{isECOIndexing && <Keywords
|
||||||
className={fullScreen ? 'w-1/5' : ''}
|
className={fullScreen ? 'w-1/5' : ''}
|
||||||
actionType='add'
|
actionType='add'
|
||||||
keywords={keywords}
|
keywords={keywords}
|
||||||
|
@@ -213,7 +213,7 @@ export default combine(
|
|||||||
settings: {
|
settings: {
|
||||||
tailwindcss: {
|
tailwindcss: {
|
||||||
// These are the default values but feel free to customize
|
// These are the default values but feel free to customize
|
||||||
callees: ['classnames', 'clsx', 'ctl', 'cn'],
|
callees: ['classnames', 'clsx', 'ctl', 'cn', 'classNames'],
|
||||||
config: 'tailwind.config.js', // returned from `loadConfig()` utility if not provided
|
config: 'tailwind.config.js', // returned from `loadConfig()` utility if not provided
|
||||||
cssFiles: [
|
cssFiles: [
|
||||||
'**/*.css',
|
'**/*.css',
|
||||||
|
Reference in New Issue
Block a user