Feat: Q&A format segmentation support (#668)

Co-authored-by: jyong <718720800@qq.com>
Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
KVOJJJin
2023-07-28 20:47:15 +08:00
committed by GitHub
parent aae2fb8a30
commit cf93d8d6e2
52 changed files with 2038 additions and 274 deletions

View File

@@ -291,7 +291,7 @@
}
.source {
@apply flex justify-between items-center mt-8 px-6 py-4 rounded-xl bg-gray-50;
@apply flex justify-between items-center mt-8 px-6 py-4 rounded-xl bg-gray-50 border border-gray-100;
}
.source .divider {

View File

@@ -7,7 +7,7 @@ import { XMarkIcon } from '@heroicons/react/20/solid'
import cn from 'classnames'
import Link from 'next/link'
import { groupBy } from 'lodash-es'
import PreviewItem from './preview-item'
import PreviewItem, { PreviewType } from './preview-item'
import s from './index.module.css'
import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
@@ -24,6 +24,8 @@ import { formatNumber } from '@/utils/format'
import type { DataSourceNotionPage } from '@/models/common'
import { DataSourceType } from '@/models/datasets'
import NotionIcon from '@/app/components/base/notion-icon'
import Switch from '@/app/components/base/switch'
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
import { useDatasetDetailContext } from '@/context/dataset-detail'
type Page = DataSourceNotionPage & { workspace_id: string }
@@ -53,6 +55,10 @@ enum IndexingType {
QUALIFIED = 'high_quality',
ECONOMICAL = 'economy',
}
enum DocForm {
TEXT = 'text_model',
QA = 'qa_model',
}
const StepTwo = ({
isSetting,
@@ -88,6 +94,10 @@ const StepTwo = ({
? IndexingType.QUALIFIED
: IndexingType.ECONOMICAL,
)
const [docForm, setDocForm] = useState<DocForm | string>(
datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT,
)
const [previewSwitched, setPreviewSwitched] = useState(false)
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<IndexingEstimateResponse | null>(null)
@@ -145,9 +155,9 @@ const StepTwo = ({
}
}
const fetchFileIndexingEstimate = async () => {
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams())
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm))
if (segmentationType === SegmentType.CUSTOM)
setCustomFileIndexingEstimate(res)
@@ -155,10 +165,11 @@ const StepTwo = ({
setAutomaticFileIndexingEstimate(res)
}
const confirmChangeCustomConfig = async () => {
const confirmChangeCustomConfig = () => {
setCustomFileIndexingEstimate(null)
setShowPreview()
await fetchFileIndexingEstimate()
fetchFileIndexingEstimate()
setPreviewSwitched(false)
}
const getIndexing_technique = () => indexingType || indexType
@@ -205,7 +216,7 @@ const StepTwo = ({
}) as NotionInfo[]
}
const getFileIndexingEstimateParams = () => {
const getFileIndexingEstimateParams = (docForm: DocForm) => {
let params
if (dataSourceType === DataSourceType.FILE) {
params = {
@@ -217,6 +228,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
if (dataSourceType === DataSourceType.NOTION) {
@@ -227,6 +239,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
}
}
return params
@@ -237,6 +250,7 @@ const StepTwo = ({
if (isSetting) {
params = {
original_document_id: documentDetail?.id,
doc_form: docForm,
process_rule: getProcessRule(),
} as CreateDocumentReq
}
@@ -250,6 +264,7 @@ const StepTwo = ({
},
indexing_technique: getIndexing_technique(),
process_rule: getProcessRule(),
doc_form: docForm,
} as CreateDocumentReq
if (dataSourceType === DataSourceType.FILE) {
params.data_source.info_list.file_info_list = {
@@ -325,6 +340,29 @@ const StepTwo = ({
}
}
const handleSwitch = (state: boolean) => {
if (state)
setDocForm(DocForm.QA)
else
setDocForm(DocForm.TEXT)
}
const changeToEconomicalType = () => {
if (!hasSetIndexType) {
setIndexType(IndexingType.ECONOMICAL)
setDocForm(DocForm.TEXT)
}
}
const previewSwitch = async () => {
setPreviewSwitched(true)
if (segmentationType === SegmentType.AUTO)
setAutomaticFileIndexingEstimate(null)
else
setCustomFileIndexingEstimate(null)
await fetchFileIndexingEstimate(DocForm.QA)
}
useEffect(() => {
// fetch rules
if (!isSetting) {
@@ -352,6 +390,11 @@ const StepTwo = ({
}
}, [showPreview])
useEffect(() => {
if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA)
setDocForm(DocForm.TEXT)
}, [indexingType, docForm])
useEffect(() => {
// get indexing type by props
if (indexingType)
@@ -366,10 +409,12 @@ const StepTwo = ({
setAutomaticFileIndexingEstimate(null)
setShowPreview()
fetchFileIndexingEstimate()
setPreviewSwitched(false)
}
else {
hidePreview()
setCustomFileIndexingEstimate(null)
setPreviewSwitched(false)
}
}, [segmentationType, indexType])
@@ -508,7 +553,7 @@ const StepTwo = ({
hasSetIndexType && s.disabled,
hasSetIndexType && '!w-full',
)}
onClick={() => !hasSetIndexType && setIndexType(IndexingType.ECONOMICAL)}
onClick={changeToEconomicalType}
>
<span className={cn(s.typeIcon, s.economical)} />
{!hasSetIndexType && <span className={cn(s.radio)} />}
@@ -527,6 +572,24 @@ const StepTwo = ({
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
</div>
)}
{indexType === IndexingType.QUALIFIED && (
<div className='flex justify-between items-center mt-3 px-5 py-4 rounded-xl bg-gray-50 border border-gray-100'>
<div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
<MessageChatSquare className='w-4 h-4' />
</div>
<div className='grow mx-3'>
<div className='mb-[2px] text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
<div className='text-[13px] leading-[18px] text-gray-500'>{t('datasetCreation.stepTwo.QATip')}</div>
</div>
<div className='shrink-0'>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleSwitch}
size='md'
/>
</div>
</div>
)}
<div className={s.source}>
<div className={s.sourceContent}>
{dataSourceType === DataSourceType.FILE && (
@@ -602,23 +665,50 @@ const StepTwo = ({
{(showPreview)
? (
<div ref={previewScrollRef} className={cn(s.previewWrap, 'relativeh-full overflow-y-scroll border-l border-[#F2F4F7]')}>
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`, ' flex items-center justify-between px-8')}>
<span>{t('datasetCreation.stepTwo.previewTitle')}</span>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
<div className={cn(s.previewHeader, previewScrolled && `${s.fixed} pb-3`)}>
<div className='flex items-center justify-between px-8'>
<div className='grow flex items-center'>
<div>{t('datasetCreation.stepTwo.previewTitle')}</div>
{docForm === DocForm.QA && !previewSwitched && (
<Button className='ml-2 !h-[26px] !py-[3px] !px-2 !text-xs !font-medium !text-primary-600' onClick={previewSwitch}>{t('datasetCreation.stepTwo.previewButton')}</Button>
)}
</div>
<div className='flex items-center justify-center w-6 h-6 cursor-pointer' onClick={hidePreview}>
<XMarkIcon className='h-4 w-4'></XMarkIcon>
</div>
</div>
{docForm === DocForm.QA && !previewSwitched && (
<div className='px-8 pr-12 text-xs text-gray-500'>
<span>{t('datasetCreation.stepTwo.previewSwitchTipStart')}</span>
<span className='text-amber-600'>{t('datasetCreation.stepTwo.previewSwitchTipEnd')}</span>
</div>
)}
</div>
<div className='my-4 px-8 space-y-4'>
{fileIndexingEstimate?.preview
? (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem key={item} content={item} index={index + 1} />
))}
</>
)
: <div className='flex items-center justify-center h-[200px]'><Loading type='area'></Loading></div>
}
{previewSwitched && docForm === DocForm.QA && fileIndexingEstimate?.qa_preview && (
<>
{fileIndexingEstimate?.qa_preview.map((item, index) => (
<PreviewItem type={PreviewType.QA} key={item.question} qa={item} index={index + 1} />
))}
</>
)}
{(docForm === DocForm.TEXT || !previewSwitched) && fileIndexingEstimate?.preview && (
<>
{fileIndexingEstimate?.preview.map((item, index) => (
<PreviewItem type={PreviewType.TEXT} key={item} content={item} index={index + 1} />
))}
</>
)}
{previewSwitched && docForm === DocForm.QA && !fileIndexingEstimate?.qa_preview && (
<div className='flex items-center justify-center h-[200px]'>
<Loading type='area' />
</div>
)}
{!previewSwitched && !fileIndexingEstimate?.preview && (
<div className='flex items-center justify-center h-[200px]'>
<Loading type='area' />
</div>
)}
</div>
</div>
)

View File

@@ -1,10 +1,21 @@
'use client'
import React, { FC } from 'react'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
export interface IPreviewItemProps {
export type IPreviewItemProps = {
type: string
index: number
content: string
content?: string
qa?: {
answer: string
question: string
}
}
export enum PreviewType {
TEXT = 'text',
QA = 'QA',
}
const sharpIcon = (
@@ -21,12 +32,16 @@ const textIcon = (
)
const PreviewItem: FC<IPreviewItemProps> = ({
type = PreviewType.TEXT,
index,
content,
qa,
}) => {
const { t } = useTranslation()
const charNums = (content || '').length
const formatedIndex = (() => (index + '').padStart(3, '0'))()
const charNums = type === PreviewType.TEXT
? (content || '').length
: (qa?.answer || '').length + (qa?.question || '').length
const formatedIndex = (() => String(index).padStart(3, '0'))()
return (
<div className='p-4 rounded-xl bg-gray-50'>
@@ -41,7 +56,21 @@ const PreviewItem: FC<IPreviewItemProps> = ({
</div>
</div>
<div className='mt-2 max-h-[120px] line-clamp-6 overflow-hidden text-sm text-gray-800'>
<div style={{ whiteSpace: 'pre-line'}}>{content}</div>
{type === PreviewType.TEXT && (
<div style={{ whiteSpace: 'pre-line' }}>{content}</div>
)}
{type === PreviewType.QA && (
<div style={{ whiteSpace: 'pre-line' }}>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>Q</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.question}</div>
</div>
<div className='flex'>
<div className='shrink-0 mr-2 text-medium text-gray-400'>A</div>
<div style={{ whiteSpace: 'pre-line' }}>{qa?.answer}</div>
</div>
</div>
)}
</div>
</div>
)