fix: Fix parent child retrieval issues (#12206)

Co-authored-by: NFish <douxc512@gmail.com>
Co-authored-by: nite-knite <nkCoding@gmail.com>
This commit is contained in:
Wu Tianwei
2025-01-02 16:07:21 +08:00
committed by GitHub
parent 68757950ce
commit 09d759d196
34 changed files with 446 additions and 387 deletions

View File

@@ -6,14 +6,10 @@ import type {
import { RerankingModeEnum } from '@/models/datasets'
export const isReRankModelSelected = ({
rerankDefaultModel,
isRerankDefaultModelValid,
retrievalConfig,
rerankModelList,
indexMethod,
}: {
rerankDefaultModel?: DefaultModelResponse
isRerankDefaultModelValid: boolean
retrievalConfig: RetrievalConfig
rerankModelList: Model[]
indexMethod?: string
@@ -25,12 +21,17 @@ export const isReRankModelSelected = ({
return provider?.models.find(({ model }) => model === retrievalConfig.reranking_model?.reranking_model_name)
}
if (isRerankDefaultModelValid)
return !!rerankDefaultModel
return false
})()
if (
indexMethod === 'high_quality'
&& ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrievalConfig.search_method))
&& retrievalConfig.reranking_enable
&& !rerankModelSelected
)
return false
if (
indexMethod === 'high_quality'
&& (retrievalConfig.search_method === RETRIEVE_METHOD.hybrid && retrievalConfig.reranking_mode !== RerankingModeEnum.WeightedScore)

View File

@@ -10,11 +10,13 @@ import { RETRIEVE_METHOD } from '@/types/app'
import type { RetrievalConfig } from '@/types/app'
type Props = {
disabled?: boolean
value: RetrievalConfig
onChange: (value: RetrievalConfig) => void
}
const EconomicalRetrievalMethodConfig: FC<Props> = ({
disabled = false,
value,
onChange,
}) => {
@@ -22,7 +24,8 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
return (
<div className='space-y-2'>
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
<OptionCard
disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
title={t('dataset.retrieval.invertedIndex.title')}
description={t('dataset.retrieval.invertedIndex.description')} isActive
activeHeaderClassName='bg-dataset-option-card-purple-gradient'

View File

@@ -1,6 +1,6 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import React, { useCallback } from 'react'
import { useTranslation } from 'react-i18next'
import Image from 'next/image'
import RetrievalParamConfig from '../retrieval-param-config'
@@ -10,7 +10,7 @@ import { retrievalIcon } from '../../create/icons'
import type { RetrievalConfig } from '@/types/app'
import { RETRIEVE_METHOD } from '@/types/app'
import { useProviderContext } from '@/context/provider-context'
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import {
DEFAULT_WEIGHTED_SCORE,
@@ -20,54 +20,87 @@ import {
import Badge from '@/app/components/base/badge'
type Props = {
disabled?: boolean
value: RetrievalConfig
onChange: (value: RetrievalConfig) => void
}
const RetrievalMethodConfig: FC<Props> = ({
value: passValue,
disabled = false,
value,
onChange,
}) => {
const { t } = useTranslation()
const { supportRetrievalMethods } = useProviderContext()
const { data: rerankDefaultModel } = useDefaultModel(ModelTypeEnum.rerank)
const value = (() => {
if (!passValue.reranking_model.reranking_model_name) {
return {
...passValue,
reranking_model: {
reranking_provider_name: rerankDefaultModel?.provider.provider || '',
reranking_model_name: rerankDefaultModel?.model || '',
},
reranking_mode: passValue.reranking_mode || (rerankDefaultModel ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore),
weights: passValue.weights || {
weight_type: WeightedScoreEnum.Customized,
vector_setting: {
vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
embedding_provider_name: '',
embedding_model_name: '',
},
keyword_setting: {
keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
},
},
}
const {
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelValid,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
const onSwitch = useCallback((retrieveMethod: RETRIEVE_METHOD) => {
if ([RETRIEVE_METHOD.semantic, RETRIEVE_METHOD.fullText].includes(retrieveMethod)) {
onChange({
...value,
search_method: retrieveMethod,
...(!value.reranking_model.reranking_model_name
? {
reranking_model: {
reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
},
reranking_enable: !!isRerankDefaultModelValid,
}
: {
reranking_enable: true,
}),
})
}
return passValue
})()
if (retrieveMethod === RETRIEVE_METHOD.hybrid) {
onChange({
...value,
search_method: retrieveMethod,
...(!value.reranking_model.reranking_model_name
? {
reranking_model: {
reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider?.provider ?? '' : '',
reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
},
reranking_enable: !!isRerankDefaultModelValid,
reranking_mode: isRerankDefaultModelValid ? RerankingModeEnum.RerankingModel : RerankingModeEnum.WeightedScore,
}
: {
reranking_enable: true,
reranking_mode: RerankingModeEnum.RerankingModel,
}),
...(!value.weights
? {
weights: {
weight_type: WeightedScoreEnum.Customized,
vector_setting: {
vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic,
embedding_provider_name: '',
embedding_model_name: '',
},
keyword_setting: {
keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword,
},
},
}
: {}),
})
}
}, [value, rerankDefaultModel, isRerankDefaultModelValid, onChange])
return (
<div className='space-y-2'>
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
<OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
title={t('dataset.retrieval.semantic_search.title')}
description={t('dataset.retrieval.semantic_search.description')}
isActive={
value.search_method === RETRIEVE_METHOD.semantic
}
onSwitched={() => onChange({
...value,
search_method: RETRIEVE_METHOD.semantic,
})}
onSwitched={() => onSwitch(RETRIEVE_METHOD.semantic)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
>
@@ -78,17 +111,14 @@ const RetrievalMethodConfig: FC<Props> = ({
/>
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.fullText) && (
<OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
title={t('dataset.retrieval.full_text_search.title')}
description={t('dataset.retrieval.full_text_search.description')}
isActive={
value.search_method === RETRIEVE_METHOD.fullText
}
onSwitched={() => onChange({
...value,
search_method: RETRIEVE_METHOD.fullText,
})}
onSwitched={() => onSwitch(RETRIEVE_METHOD.fullText)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
>
@@ -99,8 +129,8 @@ const RetrievalMethodConfig: FC<Props> = ({
/>
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.hybrid) && (
<OptionCard disabled={disabled} icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
title={
<div className='flex items-center space-x-1'>
<div>{t('dataset.retrieval.hybrid_search.title')}</div>
@@ -110,11 +140,7 @@ const RetrievalMethodConfig: FC<Props> = ({
description={t('dataset.retrieval.hybrid_search.description')} isActive={
value.search_method === RETRIEVE_METHOD.hybrid
}
onSwitched={() => onChange({
...value,
search_method: RETRIEVE_METHOD.hybrid,
reranking_enable: true,
})}
onSwitched={() => onSwitch(RETRIEVE_METHOD.hybrid)}
effectImg={Effect.src}
activeHeaderClassName='bg-dataset-option-card-purple-gradient'
>

View File

@@ -1,6 +1,6 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import React, { useCallback, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import Image from 'next/image'
@@ -39,8 +39,8 @@ const RetrievalParamConfig: FC<Props> = ({
const { t } = useTranslation()
const canToggleRerankModalEnable = type !== RETRIEVE_METHOD.hybrid
const isEconomical = type === RETRIEVE_METHOD.invertedIndex
const isHybridSearch = type === RETRIEVE_METHOD.hybrid
const {
defaultModel: rerankDefaultModel,
modelList: rerankModelList,
} = useModelListAndDefaultModel(ModelTypeEnum.rerank)
@@ -48,35 +48,28 @@ const RetrievalParamConfig: FC<Props> = ({
currentModel,
} = useCurrentProviderAndModel(
rerankModelList,
rerankDefaultModel
? {
...rerankDefaultModel,
provider: rerankDefaultModel.provider.provider,
}
: undefined,
{
provider: value.reranking_model?.reranking_provider_name ?? '',
model: value.reranking_model?.reranking_model_name ?? '',
},
)
const handleDisabledSwitchClick = useCallback(() => {
if (!currentModel)
const handleDisabledSwitchClick = useCallback((enable: boolean) => {
if (enable && !currentModel)
Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
}, [currentModel, rerankDefaultModel, t])
onChange({
...value,
reranking_enable: enable,
})
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [currentModel, onChange, value])
const isHybridSearch = type === RETRIEVE_METHOD.hybrid
const rerankModel = (() => {
if (value.reranking_model) {
return {
provider_name: value.reranking_model.reranking_provider_name,
model_name: value.reranking_model.reranking_model_name,
}
const rerankModel = useMemo(() => {
return {
provider_name: value.reranking_model.reranking_provider_name,
model_name: value.reranking_model.reranking_model_name,
}
else if (rerankDefaultModel) {
return {
provider_name: rerankDefaultModel.provider.provider,
model_name: rerankDefaultModel.model,
}
}
})()
}, [value.reranking_model])
const handleChangeRerankMode = (v: RerankingModeEnum) => {
if (v === value.reranking_mode)
@@ -100,6 +93,8 @@ const RetrievalParamConfig: FC<Props> = ({
},
}
}
if (v === RerankingModeEnum.RerankingModel && !currentModel)
Toast.notify({ type: 'error', message: t('workflow.errorMsg.rerankModelRequired') })
onChange(result)
}
@@ -122,22 +117,11 @@ const RetrievalParamConfig: FC<Props> = ({
<div>
<div className='flex items-center space-x-2 mb-2'>
{canToggleRerankModalEnable && (
<div
className='flex items-center'
onClick={handleDisabledSwitchClick}
>
<Switch
size='md'
defaultValue={currentModel ? value.reranking_enable : false}
onChange={(v) => {
onChange({
...value,
reranking_enable: v,
})
}}
disabled={!currentModel}
/>
</div>
<Switch
size='md'
defaultValue={value.reranking_enable}
onChange={handleDisabledSwitchClick}
/>
)}
<div className='flex items-center'>
<span className='mr-0.5 system-sm-semibold text-text-secondary'>{t('common.modelProvider.rerankModel.key')}</span>
@@ -148,21 +132,23 @@ const RetrievalParamConfig: FC<Props> = ({
/>
</div>
</div>
<ModelSelector
triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
modelList={rerankModelList}
readonly={!value.reranking_enable}
onSelect={(v) => {
onChange({
...value,
reranking_model: {
reranking_provider_name: v.provider,
reranking_model_name: v.model,
},
})
}}
/>
{
value.reranking_enable && (
<ModelSelector
defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
modelList={rerankModelList}
onSelect={(v) => {
onChange({
...value,
reranking_model: {
reranking_provider_name: v.provider,
reranking_model_name: v.model,
},
})
}}
/>
)
}
</div>
)}
{
@@ -255,10 +241,8 @@ const RetrievalParamConfig: FC<Props> = ({
{
value.reranking_mode !== RerankingModeEnum.WeightedScore && (
<ModelSelector
triggerClassName={`${!value.reranking_enable && '!opacity-60 !cursor-not-allowed'}`}
defaultModel={rerankModel && { provider: rerankModel.provider_name, model: rerankModel.model_name }}
modelList={rerankModelList}
readonly={!value.reranking_enable}
onSelect={(v) => {
onChange({
...value,

View File

@@ -30,6 +30,7 @@ import { useProviderContext } from '@/context/provider-context'
import { sleep } from '@/utils'
import { RETRIEVE_METHOD } from '@/types/app'
import Tooltip from '@/app/components/base/tooltip'
import { useInvalidDocumentList } from '@/service/knowledge/use-document'
type Props = {
datasetId: string
@@ -207,7 +208,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
})
const router = useRouter()
const invalidDocumentList = useInvalidDocumentList()
const navToDocumentList = () => {
invalidDocumentList()
router.push(`/datasets/${datasetId}/documents`)
}
const navToApiDocs = () => {

View File

@@ -31,17 +31,17 @@ import LanguageSelect from './language-select'
import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
import cn from '@/utils/classnames'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DocumentItem, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import { ChunkingMode, DataSourceType, ProcessMode } from '@/models/datasets'
import Button from '@/app/components/base/button'
import FloatRightContainer from '@/app/components/base/float-right-container'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import { type RetrievalConfig } from '@/types/app'
import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import Toast from '@/app/components/base/toast'
import type { NotionPage } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import { ChunkingMode, DataSourceType, RerankingModeEnum } from '@/models/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import I18n from '@/context/i18n'
import { RETRIEVE_METHOD } from '@/types/app'
@@ -90,17 +90,13 @@ type StepTwoProps = {
onCancel?: () => void
}
export enum SegmentType {
AUTO = 'automatic',
CUSTOM = 'custom',
}
export enum IndexingType {
QUALIFIED = 'high_quality',
ECONOMICAL = 'economy',
}
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
const DEFAULT_MAXMIMUM_CHUNK_LENGTH = 500
const DEFAULT_MAXIMUM_CHUNK_LENGTH = 500
const DEFAULT_OVERLAP = 50
type ParentChildConfig = {
@@ -131,7 +127,6 @@ const StepTwo = ({
isSetting,
documentDetail,
isAPIKeySet,
onSetting,
datasetId,
indexingType,
dataSourceType: inCreatePageDataSourceType,
@@ -162,12 +157,12 @@ const StepTwo = ({
const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.CUSTOM)
const [segmentationType, setSegmentationType] = useState<ProcessMode>(ProcessMode.general)
const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
}, [])
const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXMIMUM_CHUNK_LENGTH) // default chunk length
const [maxChunkLength, setMaxChunkLength] = useState(DEFAULT_MAXIMUM_CHUNK_LENGTH) // default chunk length
const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
const [overlap, setOverlap] = useState(DEFAULT_OVERLAP)
const [rules, setRules] = useState<PreProcessingRule[]>([])
@@ -198,7 +193,6 @@ const StepTwo = ({
)
// QA Related
const [isLanguageSelectDisabled, _setIsLanguageSelectDisabled] = useState(false)
const [isQAConfirmDialogOpen, setIsQAConfirmDialogOpen] = useState(false)
const [docForm, setDocForm] = useState<ChunkingMode>(
(datasetId && documentDetail) ? documentDetail.doc_form as ChunkingMode : ChunkingMode.text,
@@ -348,7 +342,7 @@ const StepTwo = ({
}
const updatePreview = () => {
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > 4000) {
if (segmentationType === ProcessMode.general && maxChunkLength > 4000) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
return
}
@@ -373,13 +367,42 @@ const StepTwo = ({
model: defaultEmbeddingModel?.model || '',
},
)
const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: {
reranking_provider_name: '',
reranking_model_name: '',
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
} as RetrievalConfig)
useEffect(() => {
if (currentDataset?.retrieval_model_dict)
return
setRetrievalConfig({
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: !!isRerankDefaultModelValid,
reranking_model: {
reranking_provider_name: isRerankDefaultModelValid ? rerankDefaultModel?.provider.provider ?? '' : '',
reranking_model_name: isRerankDefaultModelValid ? rerankDefaultModel?.model ?? '' : '',
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
})
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [rerankDefaultModel, isRerankDefaultModelValid])
const getCreationParams = () => {
let params
if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
if (segmentationType === ProcessMode.general && overlap > maxChunkLength) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
return
}
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
if (segmentationType === ProcessMode.general && maxChunkLength > limitMaxChunkLength) {
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
return
}
@@ -389,7 +412,6 @@ const StepTwo = ({
doc_form: currentDocForm,
doc_language: docLanguage,
process_rule: getProcessRule(),
// eslint-disable-next-line @typescript-eslint/no-use-before-define
retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
embedding_model: embeddingModel.model, // Readonly
embedding_model_provider: embeddingModel.provider, // Readonly
@@ -400,10 +422,7 @@ const StepTwo = ({
const indexMethod = getIndexing_technique()
if (
!isReRankModelSelected({
rerankDefaultModel,
isRerankDefaultModelValid: !!isRerankDefaultModelValid,
rerankModelList,
// eslint-disable-next-line @typescript-eslint/no-use-before-define
retrievalConfig,
indexMethod: indexMethod as string,
})
@@ -411,16 +430,6 @@ const StepTwo = ({
Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
return
}
const postRetrievalConfig = ensureRerankModelSelected({
rerankDefaultModel: rerankDefaultModel!,
retrievalConfig: {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
...retrievalConfig,
// eslint-disable-next-line @typescript-eslint/no-use-before-define
reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
},
indexMethod: indexMethod as string,
})
params = {
data_source: {
type: dataSourceType,
@@ -432,8 +441,7 @@ const StepTwo = ({
process_rule: getProcessRule(),
doc_form: currentDocForm,
doc_language: docLanguage,
retrieval_model: postRetrievalConfig,
retrieval_model: retrievalConfig,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
} as CreateDocumentReq
@@ -490,7 +498,6 @@ const StepTwo = ({
const getDefaultMode = () => {
if (documentDetail)
// @ts-expect-error fix after api refactored
setSegmentationType(documentDetail.dataset_process_rule.mode)
}
@@ -525,7 +532,6 @@ const StepTwo = ({
onSuccess(data) {
updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
updateResultCache && updateResultCache(data)
// eslint-disable-next-line @typescript-eslint/no-use-before-define
updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
},
},
@@ -545,14 +551,6 @@ const StepTwo = ({
isSetting && onSave && onSave()
}
const changeToEconomicalType = () => {
if (docForm !== ChunkingMode.text)
return
if (!hasSetIndexType)
setIndexType(IndexingType.ECONOMICAL)
}
useEffect(() => {
// fetch rules
if (!isSetting) {
@@ -574,18 +572,6 @@ const StepTwo = ({
setIndexType(isAPIKeySet ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL)
}, [isAPIKeySet, indexingType, datasetId])
const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict || {
search_method: RETRIEVE_METHOD.semantic,
reranking_enable: false,
reranking_model: {
reranking_provider_name: rerankDefaultModel?.provider.provider,
reranking_model_name: rerankDefaultModel?.model,
},
top_k: 3,
score_threshold_enabled: false,
score_threshold: 0.5,
} as RetrievalConfig)
const economyDomRef = useRef<HTMLDivElement>(null)
const isHoveringEconomy = useHover(economyDomRef)
@@ -984,12 +970,14 @@ const StepTwo = ({
getIndexing_technique() === IndexingType.QUALIFIED
? (
<RetrievalMethodConfig
disabled={!!datasetId}
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)
: (
<EconomicalRetrievalMethodConfig
disabled={!!datasetId}
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
@@ -1010,7 +998,7 @@ const StepTwo = ({
)
: (
<div className='flex items-center mt-8 py-2'>
<Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
{!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>}
<Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
</div>
)}
@@ -1081,11 +1069,11 @@ const StepTwo = ({
}
{
currentDocForm !== ChunkingMode.qa
&& <Badge text={t(
'datasetCreation.stepTwo.previewChunkCount', {
count: estimate?.total_segments || 0,
}) as string}
/>
&& <Badge text={t(
'datasetCreation.stepTwo.previewChunkCount', {
count: estimate?.total_segments || 0,
}) as string}
/>
}
</div>
</PreviewHeader>}

View File

@@ -4,7 +4,7 @@ import classNames from '@/utils/classnames'
const TriangleArrow: FC<ComponentProps<'svg'>> = props => (
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="11" viewBox="0 0 24 11" fill="none" {...props}>
<path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor"/>
<path d="M9.87868 1.12132C11.0503 -0.0502525 12.9497 -0.0502525 14.1213 1.12132L23.3137 10.3137H0.686292L9.87868 1.12132Z" fill="currentColor" />
</svg>
)
@@ -65,7 +65,7 @@ export const OptionCard: FC<OptionCardProps> = forwardRef((props, ref) => {
(isActive && !noHighlight)
? 'border-[1.5px] border-components-option-card-option-selected-border'
: 'border border-components-option-card-option-border',
disabled && 'opacity-50 cursor-not-allowed',
disabled && 'opacity-50 pointer-events-none',
className,
)}
style={{

View File

@@ -232,6 +232,16 @@ const Completed: FC<ICompletedProps> = ({
setFullScreen(false)
}, [])
const onCloseNewSegmentModal = useCallback(() => {
onNewSegmentModalChange(false)
setFullScreen(false)
}, [onNewSegmentModalChange])
const onCloseNewChildChunkModal = useCallback(() => {
setShowNewChildSegmentModal(false)
setFullScreen(false)
}, [])
const { mutateAsync: enableSegment } = useEnableSegment()
const { mutateAsync: disableSegment } = useDisableSegment()
@@ -623,6 +633,7 @@ const Completed: FC<ICompletedProps> = ({
<FullScreenDrawer
isOpen={currSegment.showModal}
fullScreen={fullScreen}
onClose={onCloseSegmentDetail}
>
<SegmentDetail
segInfo={currSegment.segInfo ?? { id: '' }}
@@ -636,13 +647,11 @@ const Completed: FC<ICompletedProps> = ({
<FullScreenDrawer
isOpen={showNewSegmentModal}
fullScreen={fullScreen}
onClose={onCloseNewSegmentModal}
>
<NewSegment
docForm={docForm}
onCancel={() => {
onNewSegmentModalChange(false)
setFullScreen(false)
}}
onCancel={onCloseNewSegmentModal}
onSave={resetList}
viewNewlyAddedChunk={viewNewlyAddedChunk}
/>
@@ -651,6 +660,7 @@ const Completed: FC<ICompletedProps> = ({
<FullScreenDrawer
isOpen={currChildChunk.showModal}
fullScreen={fullScreen}
onClose={onCloseChildSegmentDetail}
>
<ChildSegmentDetail
chunkId={currChunkId}
@@ -664,13 +674,11 @@ const Completed: FC<ICompletedProps> = ({
<FullScreenDrawer
isOpen={showNewChildSegmentModal}
fullScreen={fullScreen}
onClose={onCloseNewChildChunkModal}
>
<NewChildSegment
chunkId={currChunkId}
onCancel={() => {
setShowNewChildSegmentModal(false)
setFullScreen(false)
}}
onCancel={onCloseNewChildChunkModal}
onSave={onSaveNewChildChunk}
viewNewlyAddedChildChunk={viewNewlyAddedChildChunk}
/>

View File

@@ -80,7 +80,7 @@ ref: ForwardedRef<HTMLDivElement>,
checked={selectedSegmentIds.includes(segItem.id)}
onCheck={() => onSelected(segItem.id)}
/>
<div className='grow'>
<div className='grow min-w-0'>
<SegmentCard
key={`${segItem.id}-card`}
detail={segItem}

View File

@@ -22,8 +22,9 @@ import { useDatasetDetailContext } from '@/context/dataset-detail'
import FloatRightContainer from '@/app/components/base/float-right-container'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
import { useCheckSegmentBatchImportProgress, useSegmentBatchImport } from '@/service/knowledge/use-segment'
import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base'
type DocumentContextValue = {
datasetId?: string
@@ -149,11 +150,20 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
const embedding = ['queuing', 'indexing', 'paused'].includes((documentDetail?.display_status || '').toLowerCase())
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const handleOperate = (operateName?: string) => {
if (operateName === 'delete')
if (operateName === 'delete') {
backToPrev()
else
}
else {
detailMutate()
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
}
}
const mode = useMemo(() => {
@@ -245,7 +255,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
<div className='flex flex-row flex-1' style={{ height: 'calc(100% - 4rem)' }}>
{isDetailLoading
? <Loading type='app' />
: <div className={cn('h-full w-full flex flex-col',
: <div className={cn('h-full grow min-w-0 flex flex-col',
embedding ? '' : isFullDocMode ? 'relative pt-4 pr-11 pl-11' : 'relative pt-3 pr-11 pl-5',
)}>
{embedding

View File

@@ -24,6 +24,10 @@ import { DataSourceType } from '@/models/datasets'
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
import { useProviderContext } from '@/context/provider-context'
import cn from '@/utils/classnames'
import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base'
import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
<path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
@@ -99,7 +103,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
}, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
const { data: documentsRes, error, mutate, isLoading: isListLoading } = useSWR(
const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR(
{
action: 'fetchDocuments',
datasetId,
@@ -115,10 +119,20 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
setIsMuting(false)
}, [isListLoading, isMuting])
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const handleUpdate = useCallback(() => {
setIsMuting(true)
mutate()
}, [mutate])
invalidDocumentDetail()
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const documentsWithProgress = useMemo(() => {
let completedNum = 0

View File

@@ -133,6 +133,16 @@ export const StatusItem: FC<{
<span className={cn(`${STATUS_TEXT_COLOR_MAP[DOC_INDEX_STATUS_MAP[localStatus].color as keyof typeof STATUS_TEXT_COLOR_MAP]} text-sm`, textCls)}>
{DOC_INDEX_STATUS_MAP[localStatus]?.text}
</span>
{
errorMessage && (
<Tooltip
popupContent={
<div className='max-w-[260px] break-all'>{errorMessage}</div>
}
triggerClassName='ml-1 w-4 h-4'
/>
)
}
{
scene === 'detail' && (
<div className='flex justify-between items-center ml-1.5'>
@@ -152,16 +162,6 @@ export const StatusItem: FC<{
</div>
)
}
{
errorMessage && (
<Tooltip
popupContent={
<div className='max-w-[260px] break-all'>{errorMessage}</div>
}
triggerClassName='ml-1 w-4 h-4'
/>
)
}
</div>
}
@@ -561,18 +561,14 @@ const DocumentList: FC<IDocumentListProps> = ({
</div>
</td>
<td>
<div className={'group flex items-center justify-between mr-6 hover:mr-0'}>
<span className={cn(s.tdValue, 'flex items-center')}>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />
}
<div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
<div className='shrink-0'>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
{doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />
}
{
doc.name
}
</span>
<div className='group-hover:flex hidden'>
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
</div>
<span className='text-sm truncate grow-1'>{doc.name}</span>
<div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
<Tooltip
popupContent={t('datasetDocuments.list.table.rename')}
>

View File

@@ -17,7 +17,7 @@ const ChildChunks: FC<Props> = ({
const { id, score, content, position } = payload
return (
<div
className={!isShowAll ? 'line-clamp-2' : ''}
className={!isShowAll ? 'line-clamp-2 break-all' : ''}
>
<div className='inline-flex items-center relative top-[-2px]'>
<div className='flex items-center h-[20.5px] bg-state-accent-solid system-2xs-semibold-uppercase text-text-primary-on-surface px-1'>C-{position}</div>

View File

@@ -56,7 +56,7 @@ const ChunkDetailModal: FC<Props> = ({
</div>
<Score value={score} />
</div>
<div className={cn('mt-2 body-md-regular text-text-secondary', heighClassName)}>
<div className={cn('mt-2 body-md-regular text-text-secondary break-all', heighClassName)}>
{content}
</div>
{!isParentChildRetrieval && keywords && keywords.length > 0 && (

View File

@@ -43,13 +43,8 @@ const ResultItem: FC<Props> = ({
setFalse: hideDetailModal,
}] = useBoolean(false)
const handleClickCard = () => {
if (!isParentChildRetrieval)
showDetailModal()
}
return (
<div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg', !isParentChildRetrieval && 'cursor-pointer')} onClick={handleClickCard}>
<div className={cn('pt-3 bg-chat-bubble-bg rounded-xl hover:shadow-lg cursor-pointer')} onClick={showDetailModal}>
{/* Meta info */}
<div className='flex justify-between items-center px-3'>
<div className='flex items-center space-x-2'>
@@ -66,7 +61,7 @@ const ResultItem: FC<Props> = ({
{/* Main */}
<div className='mt-1 px-3'>
<div className='line-clamp-2 body-md-regular'>{content}</div>
<div className='line-clamp-2 body-md-regular break-all'>{content}</div>
{isParentChildRetrieval && (
<div className='mt-1'>
<div className={cn('inline-flex items-center h-6 space-x-0.5 text-text-secondary select-none rounded-lg cursor-pointer', isFold && 'pl-1 bg-[linear-gradient(90deg,_rgba(200,_206,_218,_0.20)_0%,_rgba(200,_206,_218,_0.04)_100%)]')} onClick={toggleFold}>

View File

@@ -12,15 +12,15 @@ const Score: FC<Props> = ({
value,
besideChunkName,
}) => {
if (!value)
if (!value || isNaN(value))
return null
return (
<div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden', besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
<div className={cn('relative items-center px-[5px] border border-components-progress-bar-border overflow-hidden',
besideChunkName ? 'border-l-0 h-[20.5px]' : 'h-[20px] rounded-md')}>
<div className={cn('absolute top-0 left-0 h-full bg-util-colors-blue-brand-blue-brand-100 border-r-[1.5px] border-components-progress-brand-progress', value === 1 && 'border-r-0')} style={{ width: `${value * 100}%` }} />
<div className={cn('relative flex items-center h-full space-x-0.5 text-util-colors-blue-brand-blue-brand-700')}>
<div className='system-2xs-medium-uppercase'>score</div>
<div className='system-xs-semibold'>{value.toFixed(2)}</div>
<div className='system-xs-semibold'>{value?.toFixed(2)}</div>
</div>
</div>
)

View File

@@ -192,7 +192,7 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
}
</div>
</FloatRightContainer>
<Drawer isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
<Drawer unmount={true} isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
<ModifyRetrievalModal
indexMethod={currentDataset?.indexing_technique || ''}
value={retrievalConfig}

View File

@@ -9,9 +9,8 @@ import type { RetrievalConfig } from '@/types/app'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import Button from '@/app/components/base/button'
import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { RerankingModeEnum } from '@/models/datasets'
type Props = {
indexMethod: string
@@ -39,15 +38,11 @@ const ModifyRetrievalModal: FC<Props> = ({
const {
modelList: rerankModelList,
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelValid,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
const handleSave = () => {
if (
!isReRankModelSelected({
rerankDefaultModel,
isRerankDefaultModelValid: !!isRerankDefaultModelValid,
rerankModelList,
retrievalConfig,
indexMethod,
@@ -56,14 +51,7 @@ const ModifyRetrievalModal: FC<Props> = ({
Toast.notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
return
}
onSave(ensureRerankModelSelected({
rerankDefaultModel: rerankDefaultModel!,
retrievalConfig: {
...retrievalConfig,
reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
},
indexMethod,
}))
onSave(retrievalConfig)
}
if (!isShow)

View File

@@ -17,11 +17,11 @@ import Input from '@/app/components/base/input'
import Textarea from '@/app/components/base/textarea'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
import { updateDatasetSetting } from '@/service/datasets'
import { type DataSetListResponse, RerankingModeEnum } from '@/models/datasets'
import { type DataSetListResponse } from '@/models/datasets'
import DatasetDetailContext from '@/context/dataset-detail'
import { type RetrievalConfig } from '@/types/app'
import { useAppContext } from '@/context/app-context'
import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import { isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
import {
useModelList,
@@ -74,8 +74,6 @@ const Form = () => {
)
const {
modelList: rerankModelList,
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelValid,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
@@ -109,8 +107,6 @@ const Form = () => {
}
if (
!isReRankModelSelected({
rerankDefaultModel,
isRerankDefaultModelValid: !!isRerankDefaultModelValid,
rerankModelList,
retrievalConfig,
indexMethod,
@@ -119,17 +115,9 @@ const Form = () => {
notify({ type: 'error', message: t('appDebug.datasetConfig.rerankModelRequired') })
return
}
const postRetrievalConfig = ensureRerankModelSelected({
rerankDefaultModel: rerankDefaultModel!,
retrievalConfig: {
...retrievalConfig,
reranking_enable: retrievalConfig.reranking_mode === RerankingModeEnum.RerankingModel,
},
indexMethod,
})
if (postRetrievalConfig.weights) {
postRetrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
postRetrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
if (retrievalConfig.weights) {
retrievalConfig.weights.vector_setting.embedding_provider_name = currentDataset?.embedding_model_provider || ''
retrievalConfig.weights.vector_setting.embedding_model_name = currentDataset?.embedding_model || ''
}
try {
setLoading(true)
@@ -141,8 +129,8 @@ const Form = () => {
permission,
indexing_technique: indexMethod,
retrieval_model: {
...postRetrievalConfig,
score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
...retrievalConfig,
score_threshold: retrievalConfig.score_threshold_enabled ? retrievalConfig.score_threshold : 0,
},
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,