Feat: Q&A format segmentation support (#668)
Co-authored-by: jyong <718720800@qq.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
@@ -291,7 +291,7 @@
|
||||
}
|
||||
|
||||
.source {
|
||||
@apply flex justify-between items-center mt-8 px-6 py-4 rounded-xl bg-gray-50;
|
||||
@apply flex justify-between items-center mt-8 px-6 py-4 rounded-xl bg-gray-50 border border-gray-100;
|
||||
}
|
||||
|
||||
.source .divider {
|
||||
|
@@ -7,7 +7,7 @@ import { XMarkIcon } from '@heroicons/react/20/solid'
|
||||
import cn from 'classnames'
|
||||
import Link from 'next/link'
|
||||
import { groupBy } from 'lodash-es'
|
||||
import PreviewItem from './preview-item'
|
||||
import PreviewItem, { PreviewType } from './preview-item'
|
||||
import s from './index.module.css'
|
||||
import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
|
||||
import {
|
||||
@@ -24,6 +24,8 @@ import { formatNumber } from '@/utils/format'
|
||||
import type { DataSourceNotionPage } from '@/models/common'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
|
||||
import { useDatasetDetailContext } from '@/context/dataset-detail'
|
||||
|
||||
type Page = DataSourceNotionPage & { workspace_id: string }
|
||||
@@ -53,6 +55,10 @@ enum IndexingType {
|
||||
QUALIFIED = 'high_quality',
|
||||
ECONOMICAL = 'economy',
|
||||
}
|
||||
enum DocForm {
|
||||
TEXT = 'text_model',
|
||||
QA = 'qa_model',
|
||||
}
|
||||
|
||||
const StepTwo = ({
|
||||
isSetting,
|
||||
@@ -88,6 +94,10 @@ const StepTwo = ({
|
||||
? IndexingType.QUALIFIED
|
||||
: IndexingType.ECONOMICAL,
|
||||
)
|
||||
const [docForm, setDocForm] = useState<DocForm | string>(
|
||||
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
|
||||
)
|
||||
const [previewSwitched, setPreviewSwitched] = useState(false)
|
||||
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
|
||||
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
||||
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
|
||||
@@ -145,9 +155,9 @@ const StepTwo = ({
|
||||
}
|
||||
}
|
||||
|
||||
const fetchFileIndexingEstimate = async () => {
|
||||
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
||||
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams())
|
||||
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
|
||||
if (segmentationType === SegmentType.CUSTOM)
|
||||
setCustomFileIndexingEstimate(res)
|
||||
|
||||
@@ -155,10 +165,11 @@ const StepTwo = ({
|
||||
setAutomaticFileIndexingEstimate(res)
|
||||
}
|
||||
|
||||
const confirmChangeCustomConfig = async () => {
|
||||
const confirmChangeCustomConfig = () => {
|
||||
setCustomFileIndexingEstimate(null)
|
||||
setShowPreview()
|
||||
await fetchFileIndexingEstimate()
|
||||
fetchFileIndexingEstimate()
|
||||
setPreviewSwitched(false)
|
||||
}
|
||||
|
||||
const getIndexing_technique = () => indexingType || indexType
|
||||
@@ -205,7 +216,7 @@ const StepTwo = ({
|
||||
}) as NotionInfo[]
|
||||
}
|
||||
|
||||
const getFileIndexingEstimateParams = () => {
|
||||
const getFileIndexingEstimateParams = (docForm: DocForm) => {
|
||||
let params
|
||||
if (dataSourceType === DataSourceType.FILE) {
|
||||
params = {
|
||||
@@ -217,6 +228,7 @@ const StepTwo = ({
|
||||
},
|
||||
indexing_technique: getIndexing_technique(),
|
||||
process_rule: getProcessRule(),
|
||||
doc_form: docForm,
|
||||
}
|
||||
}
|
||||
if (dataSourceType === DataSourceType.NOTION) {
|
||||
@@ -227,6 +239,7 @@ const StepTwo = ({
|
||||
},
|
||||
indexing_technique: getIndexing_technique(),
|
||||
process_rule: getProcessRule(),
|
||||
doc_form: docForm,
|
||||
}
|
||||
}
|
||||
return params
|
||||
@@ -237,6 +250,7 @@ const StepTwo = ({
|
||||
if (isSetting) {
|
||||
params = {
|
||||
original_document_id: documentDetail?.id,
|
||||
doc_form: docForm,
|
||||
process_rule: getProcessRule(),
|
||||
} as CreateDocumentReq
|
||||
}
|
||||
@@ -250,6 +264,7 @@ const StepTwo = ({
|
||||
},
|
||||
indexing_technique: getIndexing_technique(),
|
||||
process_rule: getProcessRule(),
|
||||
doc_form: docForm,
|
||||
} as CreateDocumentReq
|
||||
if (dataSourceType === DataSourceType.FILE) {
|
||||
params.data_source.info_list.file_info_list = {
|
||||
@@ -325,6 +340,29 @@ const StepTwo = ({
|
||||
}
|
||||
}
|
||||
|
||||
const handleSwitch = (state: boolean) => {
|
||||
if (state)
|
||||
setDocForm(DocForm.QA)
|
||||
else
|
||||
setDocForm(DocForm.TEXT)
|
||||
}
|
||||
|
||||
const changeToEconomicalType = () => {
|
||||
if (!hasSetIndexType) {
|
||||
setIndexType(IndexingType.ECONOMICAL)
|
||||
setDocForm(DocForm.TEXT)
|
||||
}
|
||||
}
|
||||
|
||||
const previewSwitch = async () => {
|
||||
setPreviewSwitched(true)
|
||||
if (segmentationType === SegmentType.AUTO)
|
||||
setAutomaticFileIndexingEstimate(null)
|
||||
else
|
||||
setCustomFileIndexingEstimate(null)
|
||||
await fetchFileIndexingEstimate(DocForm.QA)
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
// fetch rules
|
||||
if (!isSetting) {
|
||||
@@ -352,6 +390,11 @@ const StepTwo = ({
|
||||
}
|
||||
}, [showPreview])
|
||||
|
||||
useEffect(() => {
|
||||
if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA)
|
||||
setDocForm(DocForm.TEXT)
|
||||
}, [indexingType, docForm])
|
||||
|
||||
useEffect(() => {
|
||||
// get indexing type by props
|
||||
if (indexingType)
|
||||
@@ -366,10 +409,12 @@ const StepTwo = ({
|
||||
setAutomaticFileIndexingEstimate(null)
|
||||
setShowPreview()
|
||||
fetchFileIndexingEstimate()
|
||||
setPreviewSwitched(false)
|
||||
}
|
||||
else {
|
||||
hidePreview()
|
||||
setCustomFileIndexingEstimate(null)
|
||||
setPreviewSwitched(false)
|
||||
}
|
||||
}, [segmentationType, indexType])
|
||||
|
||||
@@ -508,7 +553,7 @@ const StepTwo = ({
|
||||
hasSetIndexType && s.disabled,
|
||||
hasSetIndexType && '!w-full',
|
||||
)}
|
||||
onClick={() => !hasSetIndexType && setIndexType(IndexingType.ECONOMICAL)}
|
||||
onClick={changeToEconomicalType}
|
||||
>
|
||||
<span className={cn(s.typeIcon, s.economical)} />
|
||||
{!hasSetIndexType && <span className={cn(s.radio)} />}
|
||||
@@ -527,6 +572,24 @@ const StepTwo = ({
|
||||
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
|
||||
</div>
|
||||
)}
|
||||
{indexType === IndexingType.QUALIFIED && (
|
||||
<div className='flex justify-between items-center mt-3 px-5 py-4 rounded-xl bg-gray-50 border border-gray-100'>
|
||||
<div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
|
||||
<MessageChatSquare className='w-4 h-4' />
|
||||
</div>
|
||||
<div className='grow mx-3'>
|
||||
<div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
|
||||
<div className='text-[13px] leading-[18px] text-gray-500'>{t('datasetCreation.stepTwo.QATip')}</div>
|
||||
</div>
|
||||
<div className='shrink-0'>
|
||||
<Switch
|
||||
defaultValue={docForm === DocForm.QA}
|
||||
onChange={handleSwitch}
|
||||
size='md'
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
<div className={s.source}>
|
||||
<div className={s.sourceContent}>
|
||||
{dataSourceType === DataSourceType.FILE && (
|
||||
@@ -602,23 +665,50 @@ const StepTwo = ({
|
||||
{(showPreview)
|
||||
? (
|
||||
<div ref={previewScrollRef} className={cn(s.previewWrap, 'relativeh-full overflow-y-scroll border-l border-[#F2F4F7]')}>
|
||||
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`, ' flex items-center justify-between px-8')}>
|
||||
<span>{t('datasetCreation.stepTwo.previewTitle')}</span>
|
||||
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
|
||||
<XMarkIcon className='h-4 w-4'></XMarkIcon>
|
||||
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}>
|
||||
<div className='flex items-center justify-between px-8'>
|
||||
<div className='grow flex items-center'>
|
||||
<div>{t('datasetCreation.stepTwo.previewTitle')}</div>
|
||||
{docForm === DocForm.QA && !previewSwitched && (
|
||||
<Button className='ml-2 !h-[26px] !py-[3px] !px-2 !text-xs !font-medium !text-primary-600' onClick={previewSwitch}>{t('datasetCreation.stepTwo.previewButton')}</Button>
|
||||
)}
|
||||
</div>
|
||||
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
|
||||
<XMarkIcon className='h-4 w-4'></XMarkIcon>
|
||||
</div>
|
||||
</div>
|
||||
{docForm === DocForm.QA && !previewSwitched && (
|
||||
<div className='px-8 pr-12 text-xs text-gray-500'>
|
||||
<span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span>
|
||||
<span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className='my-4 px-8 space-y-4'>
|
||||
{fileIndexingEstimate?.preview
|
||||
? (
|
||||
<>
|
||||
{fileIndexingEstimate?.preview.map((item, index) => (
|
||||
<PreviewItem key={item} content={item} index={index + 1} />
|
||||
))}
|
||||
</>
|
||||
)
|
||||
: <div className='flex items-center justify-center h-[200px]'><Loading type='area'></Loading></div>
|
||||
}
|
||||
{previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && (
|
||||
<>
|
||||
{fileIndexingEstimate?.qa_preview.map((item, index) => (
|
||||
<PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} />
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
{(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && (
|
||||
<>
|
||||
{fileIndexingEstimate?.preview.map((item, index) => (
|
||||
<PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} />
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
{previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && (
|
||||
<div className='flex items-center justify-center h-[200px]'>
|
||||
<Loading type='area' />
|
||||
</div>
|
||||
)}
|
||||
{!previewSwitched && !fileIndexingEstimate?.preview && (
|
||||
<div className='flex items-center justify-center h-[200px]'>
|
||||
<Loading type='area' />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
@@ -1,10 +1,21 @@
|
||||
'use client'
|
||||
import React, { FC } from 'react'
|
||||
import type { FC } from 'react'
|
||||
import React from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
||||
export interface IPreviewItemProps {
|
||||
export type IPreviewItemProps = {
|
||||
type: string
|
||||
index: number
|
||||
content: string
|
||||
content?: string
|
||||
qa?: {
|
||||
answer: string
|
||||
question: string
|
||||
}
|
||||
}
|
||||
|
||||
export enum PreviewType {
|
||||
TEXT = 'text',
|
||||
QA = 'QA',
|
||||
}
|
||||
|
||||
const sharpIcon = (
|
||||
@@ -21,12 +32,16 @@ const textIcon = (
|
||||
)
|
||||
|
||||
const PreviewItem: FC<IPreviewItemProps> = ({
|
||||
type = PreviewType.TEXT,
|
||||
index,
|
||||
content,
|
||||
qa,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
const charNums = (content || '').length
|
||||
const formatedIndex = (() => (index + '').padStart(3, '0'))()
|
||||
const charNums = type === PreviewType.TEXT
|
||||
? (content || '').length
|
||||
: (qa?.answer || '').length + (qa?.question || '').length
|
||||
const formatedIndex = (() => String(index).padStart(3, '0'))()
|
||||
|
||||
return (
|
||||
<div className='p-4 rounded-xl bg-gray-50'>
|
||||
@@ -41,7 +56,21 @@ const PreviewItem: FC<IPreviewItemProps> = ({
|
||||
</div>
|
||||
</div>
|
||||
<div className='mt-2 max-h-[120px] line-clamp-6 overflow-hidden text-sm text-gray-800'>
|
||||
<div style={{ whiteSpace: 'pre-line'}}>{content}</div>
|
||||
{type === PreviewType.TEXT && (
|
||||
<div style={{ whiteSpace: 'pre-line' }}>{content}</div>
|
||||
)}
|
||||
{type === PreviewType.QA && (
|
||||
<div style={{ whiteSpace: 'pre-line' }}>
|
||||
<div className='flex'>
|
||||
<div className='shrink-0 mr-2 text-medium text-gray-400'>Q</div>
|
||||
<div style={{ whiteSpace: 'pre-line' }}>{qa?.question}</div>
|
||||
</div>
|
||||
<div className='flex'>
|
||||
<div className='shrink-0 mr-2 text-medium text-gray-400'>A</div>
|
||||
<div style={{ whiteSpace: 'pre-line' }}>{qa?.answer}</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
Reference in New Issue
Block a user