Feat: Support re-segmentation (#114)
Co-authored-by: John Wang <takatost@gmail.com> Co-authored-by: Jyong <718720800@qq.com> Co-authored-by: 金伟强 <iamjoel007@gmail.com>
This commit is contained in:
@@ -9,7 +9,7 @@ import {
|
||||
createDocument,
|
||||
fetchFileIndexingEstimate as didFetchFileIndexingEstimate,
|
||||
} from '@/service/datasets'
|
||||
import type { CreateDocumentReq, createDocumentResponse } from '@/models/datasets'
|
||||
import type { CreateDocumentReq, createDocumentResponse, FullDocumentDetail } from '@/models/datasets'
|
||||
import Button from '@/app/components/base/button'
|
||||
import PreviewItem from './preview-item'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
@@ -22,14 +22,18 @@ import Toast from '@/app/components/base/toast'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
|
||||
type StepTwoProps = {
|
||||
isSetting?: boolean,
|
||||
documentDetail?: FullDocumentDetail
|
||||
hasSetAPIKEY: boolean,
|
||||
onSetting: () => void,
|
||||
datasetId?: string,
|
||||
indexingType?: string,
|
||||
file?: File,
|
||||
onStepChange: (delta: number) => void,
|
||||
updateIndexingTypeCache: (type: string) => void,
|
||||
updateResultCache: (res: createDocumentResponse) => void
|
||||
onStepChange?: (delta: number) => void,
|
||||
updateIndexingTypeCache?: (type: string) => void,
|
||||
updateResultCache?: (res: createDocumentResponse) => void
|
||||
onSave?: () => void
|
||||
onCancel?: () => void
|
||||
}
|
||||
|
||||
enum SegmentType {
|
||||
@@ -42,6 +46,8 @@ enum IndexingType {
|
||||
}
|
||||
|
||||
const StepTwo = ({
|
||||
isSetting,
|
||||
documentDetail,
|
||||
hasSetAPIKEY,
|
||||
onSetting,
|
||||
datasetId,
|
||||
@@ -50,6 +56,8 @@ const StepTwo = ({
|
||||
onStepChange,
|
||||
updateIndexingTypeCache,
|
||||
updateResultCache,
|
||||
onSave,
|
||||
onCancel,
|
||||
}: StepTwoProps) => {
|
||||
const { t } = useTranslation()
|
||||
const scrollRef = useRef<HTMLDivElement>(null)
|
||||
@@ -171,15 +179,23 @@ const StepTwo = ({
|
||||
}
|
||||
|
||||
const getCreationParams = () => {
|
||||
const params = {
|
||||
data_source: {
|
||||
type: 'upload_file',
|
||||
info: file?.id,
|
||||
name: file?.name,
|
||||
},
|
||||
indexing_technique: getIndexing_technique(),
|
||||
process_rule: getProcessRule(),
|
||||
} as CreateDocumentReq
|
||||
let params
|
||||
if (isSetting) {
|
||||
params = {
|
||||
original_document_id: documentDetail?.id,
|
||||
process_rule: getProcessRule(),
|
||||
} as CreateDocumentReq
|
||||
} else {
|
||||
params = {
|
||||
data_source: {
|
||||
type: 'upload_file',
|
||||
info: file?.id,
|
||||
name: file?.name,
|
||||
},
|
||||
indexing_technique: getIndexing_technique(),
|
||||
process_rule: getProcessRule(),
|
||||
} as CreateDocumentReq
|
||||
}
|
||||
return params
|
||||
}
|
||||
|
||||
@@ -196,6 +212,25 @@ const StepTwo = ({
|
||||
console.log(err)
|
||||
}
|
||||
}
|
||||
|
||||
const getRulesFromDetail = () => {
|
||||
if (documentDetail) {
|
||||
const rules = documentDetail.dataset_process_rule.rules
|
||||
const separator = rules.segmentation.separator
|
||||
const max = rules.segmentation.max_tokens
|
||||
setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n')
|
||||
setMax(max)
|
||||
setRules(rules.pre_processing_rules)
|
||||
setDefaultConfig(rules)
|
||||
}
|
||||
}
|
||||
|
||||
const getDefaultMode = () => {
|
||||
if (documentDetail) {
|
||||
setSegmentationType(documentDetail.dataset_process_rule.mode)
|
||||
}
|
||||
}
|
||||
|
||||
const createHandle = async () => {
|
||||
try {
|
||||
let res;
|
||||
@@ -204,19 +239,20 @@ const StepTwo = ({
|
||||
res = await createFirstDocument({
|
||||
body: params
|
||||
})
|
||||
updateIndexingTypeCache(indexType)
|
||||
updateResultCache(res)
|
||||
updateIndexingTypeCache && updateIndexingTypeCache(indexType)
|
||||
updateResultCache && updateResultCache(res)
|
||||
} else {
|
||||
res = await createDocument({
|
||||
datasetId,
|
||||
body: params
|
||||
})
|
||||
updateIndexingTypeCache(indexType)
|
||||
updateResultCache({
|
||||
updateIndexingTypeCache && updateIndexingTypeCache(indexType)
|
||||
updateResultCache && updateResultCache({
|
||||
document: res,
|
||||
})
|
||||
}
|
||||
onStepChange(+1)
|
||||
onStepChange && onStepChange(+1)
|
||||
isSetting && onSave && onSave()
|
||||
}
|
||||
catch (err) {
|
||||
Toast.notify({
|
||||
@@ -228,7 +264,12 @@ const StepTwo = ({
|
||||
|
||||
useEffect(() => {
|
||||
// fetch rules
|
||||
getRules()
|
||||
if (!isSetting) {
|
||||
getRules()
|
||||
} else {
|
||||
getRulesFromDetail()
|
||||
getDefaultMode()
|
||||
}
|
||||
}, [])
|
||||
|
||||
useEffect(() => {
|
||||
@@ -444,11 +485,18 @@ const StepTwo = ({
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className='flex items-center mt-8 py-2'>
|
||||
<Button onClick={() => onStepChange(-1)}>{t('datasetCreation.stepTwo.lastStep')}</Button>
|
||||
<div className={s.divider} />
|
||||
<Button type='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
|
||||
</div>
|
||||
{!isSetting ? (
|
||||
<div className='flex items-center mt-8 py-2'>
|
||||
<Button onClick={() => onStepChange && onStepChange(-1)}>{t('datasetCreation.stepTwo.lastStep')}</Button>
|
||||
<div className={s.divider} />
|
||||
<Button type='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.nextStep')}</Button>
|
||||
</div>
|
||||
) : (
|
||||
<div className='flex items-center mt-8 py-2'>
|
||||
<Button type='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
|
||||
<Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@@ -19,7 +19,7 @@ import type { FullDocumentDetail, ProcessRuleResponse } from '@/models/datasets'
|
||||
import type { CommonResponse } from '@/models/common'
|
||||
import { asyncRunSafe } from '@/utils'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
import { fetchIndexingEstimate, fetchIndexingStatus, fetchProcessRule, pauseDocIndexing, resumeDocIndexing } from '@/service/datasets'
|
||||
import { fetchIndexingEstimate, fetchProcessRule, pauseDocIndexing, resumeDocIndexing } from '@/service/datasets'
|
||||
import DatasetDetailContext from '@/context/dataset-detail'
|
||||
import StopEmbeddingModal from '@/app/components/datasets/create/stop-embedding-modal'
|
||||
|
||||
@@ -118,14 +118,45 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
|
||||
const localDocumentId = docId ?? documentId
|
||||
const localIndexingTechnique = indexingType ?? indexingTechnique
|
||||
|
||||
const { data: indexingStatusDetail, error: indexingStatusErr, mutate: statusMutate } = useSWR({
|
||||
action: 'fetchIndexingStatus',
|
||||
datasetId: localDatasetId,
|
||||
documentId: localDocumentId,
|
||||
}, apiParams => fetchIndexingStatus(omit(apiParams, 'action')), {
|
||||
refreshInterval: 5000,
|
||||
revalidateOnFocus: false,
|
||||
})
|
||||
// const { data: indexingStatusDetailFromApi, error: indexingStatusErr, mutate: statusMutate } = useSWR({
|
||||
// action: 'fetchIndexingStatus',
|
||||
// datasetId: localDatasetId,
|
||||
// documentId: localDocumentId,
|
||||
// }, apiParams => fetchIndexingStatus(omit(apiParams, 'action')), {
|
||||
// refreshInterval: 2500,
|
||||
// revalidateOnFocus: false,
|
||||
// })
|
||||
|
||||
const [indexingStatusDetail, setIndexingStatusDetail, getIndexingStatusDetail] = useGetState<any>(null)
|
||||
const fetchIndexingStatus = async () => {
|
||||
const status = await doFetchIndexingStatus({ datasetId: localDatasetId, documentId: localDocumentId })
|
||||
setIndexingStatusDetail(status)
|
||||
}
|
||||
|
||||
const [runId, setRunId, getRunId] = useGetState<any>(null)
|
||||
const startQueryStatus = () => {
|
||||
const runId = setInterval(() => {
|
||||
const indexingStatusDetail = getIndexingStatusDetail()
|
||||
if (indexingStatusDetail?.indexing_status === 'completed') {
|
||||
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
||||
stopQueryStatus()
|
||||
return
|
||||
}
|
||||
fetchIndexingStatus()
|
||||
}, 2500)
|
||||
setRunId(runId)
|
||||
}
|
||||
const stopQueryStatus = () => {
|
||||
clearInterval(getRunId())
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
fetchIndexingStatus()
|
||||
startQueryStatus()
|
||||
return () => {
|
||||
stopQueryStatus()
|
||||
}
|
||||
}, [])
|
||||
|
||||
const { data: indexingEstimateDetail, error: indexingEstimateErr } = useSWR({
|
||||
action: 'fetchIndexingEstimate',
|
||||
@@ -168,7 +199,7 @@ const EmbeddingDetail: FC<Props> = ({ detail, stopPosition = 'top', datasetId: d
|
||||
const [e] = await asyncRunSafe<CommonResponse>(opApi({ datasetId: localDatasetId, documentId: localDocumentId }) as Promise<CommonResponse>)
|
||||
if (!e) {
|
||||
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
|
||||
statusMutate()
|
||||
setIndexingStatusDetail(null)
|
||||
}
|
||||
else {
|
||||
notify({ type: 'error', message: t('common.actionMsg.modificationFailed') })
|
||||
|
@@ -0,0 +1,90 @@
|
||||
'use client'
|
||||
import React, { useState, useCallback, useEffect } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useBoolean } from 'ahooks'
|
||||
import { useContext } from 'use-context-selector'
|
||||
import { useRouter } from 'next/navigation'
|
||||
import DatasetDetailContext from '@/context/dataset-detail'
|
||||
import type { FullDocumentDetail } from '@/models/datasets'
|
||||
import { fetchTenantInfo } from '@/service/common'
|
||||
import { fetchDocumentDetail, MetadataType } from '@/service/datasets'
|
||||
|
||||
import Loading from '@/app/components/base/loading'
|
||||
import StepTwo from '@/app/components/datasets/create/step-two'
|
||||
import AccountSetting from '@/app/components/header/account-setting'
|
||||
import AppUnavailable from '@/app/components/base/app-unavailable'
|
||||
|
||||
type DocumentSettingsProps = {
|
||||
datasetId: string;
|
||||
documentId: string;
|
||||
}
|
||||
|
||||
const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
|
||||
const { t } = useTranslation()
|
||||
const router = useRouter()
|
||||
const [hasSetAPIKEY, setHasSetAPIKEY] = useState(true)
|
||||
const [isShowSetAPIKey, { setTrue: showSetAPIKey, setFalse: hideSetAPIkey }] = useBoolean()
|
||||
const [hasError, setHasError] = useState(false)
|
||||
const { indexingTechnique, dataset } = useContext(DatasetDetailContext)
|
||||
|
||||
const saveHandler = () => router.push(`/datasets/${datasetId}/documents/${documentId}`)
|
||||
|
||||
const cancelHandler = () => router.back()
|
||||
|
||||
const checkAPIKey = async () => {
|
||||
const data = await fetchTenantInfo({ url: '/info' })
|
||||
const hasSetKey = data.providers.some(({ is_valid }) => is_valid)
|
||||
setHasSetAPIKEY(hasSetKey)
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
checkAPIKey()
|
||||
}, [])
|
||||
|
||||
const [documentDetail, setDocumentDetail] = useState<FullDocumentDetail | null>(null)
|
||||
useEffect(() => {
|
||||
(async () => {
|
||||
try {
|
||||
const detail = await fetchDocumentDetail({
|
||||
datasetId,
|
||||
documentId,
|
||||
params: { metadata: 'without' as MetadataType }
|
||||
})
|
||||
setDocumentDetail(detail)
|
||||
} catch (e) {
|
||||
setHasError(true)
|
||||
}
|
||||
})()
|
||||
}, [datasetId, documentId])
|
||||
|
||||
if (hasError) {
|
||||
return <AppUnavailable code={500} unknownReason={t('datasetCreation.error.unavailable') as string} />
|
||||
}
|
||||
|
||||
return (
|
||||
<div className='flex' style={{ height: 'calc(100vh - 56px)' }}>
|
||||
<div className="grow bg-white">
|
||||
{!documentDetail && <Loading type='app' />}
|
||||
{dataset && documentDetail && (
|
||||
<StepTwo
|
||||
hasSetAPIKEY={hasSetAPIKEY}
|
||||
onSetting={showSetAPIKey}
|
||||
datasetId={datasetId}
|
||||
indexingType={indexingTechnique || ''}
|
||||
isSetting
|
||||
documentDetail={documentDetail}
|
||||
file={documentDetail.data_source_info.upload_file}
|
||||
onSave={saveHandler}
|
||||
onCancel={cancelHandler}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
{isShowSetAPIKey && <AccountSetting activeTab="provider" onCancel={async () => {
|
||||
await checkAPIKey()
|
||||
hideSetAPIkey()
|
||||
}} />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default DocumentSettings
|
@@ -95,6 +95,7 @@ export const OperationAction: FC<{
|
||||
const [showModal, setShowModal] = useState(false)
|
||||
const { notify } = useContext(ToastContext)
|
||||
const { t } = useTranslation()
|
||||
const router = useRouter()
|
||||
|
||||
const isListScene = scene === 'list'
|
||||
|
||||
@@ -166,15 +167,19 @@ export const OperationAction: FC<{
|
||||
</div>
|
||||
<Divider />
|
||||
</>}
|
||||
{/* <div className={s.actionItem}>
|
||||
<SettingsIcon />
|
||||
<span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
|
||||
</div>
|
||||
<div className={s.actionItem} onClick={() => router.push(`/datasets/${datasetId}/documents/create`)}>
|
||||
<FilePlusIcon />
|
||||
<span className={s.actionName}>{t('datasetDocuments.list.action.uploadFile')}</span>
|
||||
</div>
|
||||
<Divider className='my-1' /> */}
|
||||
{!archived && (
|
||||
<>
|
||||
<div className={s.actionItem} onClick={() => router.push(`/datasets/${datasetId}/documents/${detail.id}/settings`)}>
|
||||
<SettingsIcon />
|
||||
<span className={s.actionName}>{t('datasetDocuments.list.action.settings')}</span>
|
||||
</div>
|
||||
{/* <div className={s.actionItem} onClick={() => router.push(`/datasets/${datasetId}/documents/create`)}>
|
||||
<FilePlusIcon />
|
||||
<span className={s.actionName}>{t('datasetDocuments.list.action.uploadFile')}</span>
|
||||
</div> */}
|
||||
<Divider className='my-1' />
|
||||
</>
|
||||
)}
|
||||
{!archived && <div className={s.actionItem} onClick={() => onOperate('archive')}>
|
||||
<ArchiveIcon />
|
||||
<span className={s.actionName}>{t('datasetDocuments.list.action.archive')}</span>
|
||||
|
@@ -72,7 +72,7 @@
|
||||
.txtIcon {
|
||||
background-image: url(./assets/txt.svg);
|
||||
}
|
||||
.mdIcon {
|
||||
.markdownIcon {
|
||||
background-image: url(./assets/md.svg);
|
||||
}
|
||||
.statusItemDetail {
|
||||
|
@@ -1,15 +1,15 @@
|
||||
'use client'
|
||||
import { Dispatch, SetStateAction, useEffect, useState } from 'react'
|
||||
import { useEffect, useState } from 'react'
|
||||
import useSWR from 'swr'
|
||||
import { useContext } from 'use-context-selector'
|
||||
import { BookOpenIcon } from '@heroicons/react/24/outline'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { ToastContext } from '@/app/components/base/toast'
|
||||
import PermissionsRadio from '../permissions-radio'
|
||||
import IndexMethodRadio from '../index-method-radio'
|
||||
import { ToastContext } from '@/app/components/base/toast'
|
||||
import Button from '@/app/components/base/button'
|
||||
import { updateDatasetSetting, fetchDataDetail } from '@/service/datasets'
|
||||
import { DataSet } from '@/models/datasets'
|
||||
import { fetchDataDetail, updateDatasetSetting } from '@/service/datasets'
|
||||
import type { DataSet } from '@/models/datasets'
|
||||
|
||||
const rowClass = `
|
||||
flex justify-between py-4
|
||||
@@ -20,8 +20,7 @@ const labelClass = `
|
||||
const inputClass = `
|
||||
w-[480px] px-3 bg-gray-100 text-sm text-gray-800 rounded-lg outline-none appearance-none
|
||||
`
|
||||
|
||||
const useInitialValue = <T,>(depend: T, dispatch: Dispatch<SetStateAction<T>>) => {
|
||||
const useInitialValue = (depend: any, dispatch: any) => {
|
||||
useEffect(() => {
|
||||
dispatch(depend)
|
||||
}, [depend])
|
||||
@@ -32,7 +31,7 @@ type Props = {
|
||||
}
|
||||
|
||||
const Form = ({
|
||||
datasetId
|
||||
datasetId,
|
||||
}: Props) => {
|
||||
const { t } = useTranslation()
|
||||
const { notify } = useContext(ToastContext)
|
||||
@@ -44,7 +43,8 @@ const Form = ({
|
||||
const [indexMethod, setIndexMethod] = useState(currentDataset?.indexing_technique)
|
||||
|
||||
const handleSave = async () => {
|
||||
if (loading) return
|
||||
if (loading)
|
||||
return
|
||||
if (!name?.trim()) {
|
||||
notify({ type: 'error', message: t('datasetSettings.form.nameError') })
|
||||
return
|
||||
@@ -57,14 +57,16 @@ const Form = ({
|
||||
name,
|
||||
description,
|
||||
permission,
|
||||
indexing_technique: indexMethod
|
||||
}
|
||||
indexing_technique: indexMethod,
|
||||
},
|
||||
})
|
||||
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
|
||||
await mutateDatasets()
|
||||
} catch (e) {
|
||||
}
|
||||
catch (e) {
|
||||
notify({ type: 'error', message: t('common.actionMsg.modificationFailed') })
|
||||
} finally {
|
||||
}
|
||||
finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
@@ -142,4 +144,4 @@ const Form = ({
|
||||
)
|
||||
}
|
||||
|
||||
export default Form
|
||||
export default Form
|
||||
|
Reference in New Issue
Block a user