Feat:remove estimation of embedding cost (#7950)

Co-authored-by: jyong <718720800@qq.com>
This commit is contained in:
KVOJJJin
2024-09-04 14:41:47 +08:00
committed by GitHub
parent 83e84865be
commit 14af87527f
14 changed files with 122 additions and 162 deletions

View File

@@ -30,7 +30,7 @@
}
.indexItem {
min-height: 146px;
min-height: 126px;
}
.indexItem .disableMask {
@@ -121,10 +121,6 @@
@apply pb-1;
}
.radioItem.indexItem .typeHeader .tip {
@apply pb-3;
}
.radioItem .typeIcon {
position: absolute;
top: 18px;
@@ -264,7 +260,7 @@
}
.input {
@apply inline-flex h-9 w-full py-1 px-2 rounded-lg text-xs leading-normal;
@apply inline-flex h-9 w-full py-1 px-2 pr-14 rounded-lg text-xs leading-normal;
@apply bg-gray-100 caret-primary-600 hover:bg-gray-100 focus:ring-1 focus:ring-inset focus:ring-gray-200 focus-visible:outline-none focus:bg-white placeholder:text-gray-400;
}

View File

@@ -14,7 +14,7 @@ import PreviewItem, { PreviewType } from './preview-item'
import LanguageSelect from './language-select'
import s from './index.module.css'
import cn from '@/utils/classnames'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
createDocument,
createFirstDocument,
@@ -41,8 +41,10 @@ import { IS_CE_EDITION } from '@/config'
import { RETRIEVE_METHOD } from '@/types/app'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import Tooltip from '@/app/components/base/tooltip'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { useDefaultModel, useModelList, useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { LanguagesSupported } from '@/i18n/language'
import ModelSelector from '@/app/components/header/account-setting/model-provider-page/model-selector'
import type { DefaultModel } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
@@ -109,7 +111,7 @@ const StepTwo = ({
const [previewScrolled, setPreviewScrolled] = useState(false)
const [segmentationType, setSegmentationType] = useState<SegmentType>(SegmentType.AUTO)
const [segmentIdentifier, setSegmentIdentifier] = useState('\\n')
const [max, setMax] = useState(500)
const [max, setMax] = useState(5000) // default chunk length
const [overlap, setOverlap] = useState(50)
const [rules, setRules] = useState<PreProcessingRule[]>([])
const [defaultConfig, setDefaultConfig] = useState<Rules>()
@@ -131,7 +133,6 @@ const StepTwo = ({
const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean()
const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState<FileIndexingEstimateResponse | null>(null)
const [estimateTokes, setEstimateTokes] = useState<Pick<IndexingEstimateResponse, 'tokens' | 'total_price'> | null>(null)
const fileIndexingEstimate = (() => {
return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate
@@ -192,13 +193,10 @@ const StepTwo = ({
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => {
// eslint-disable-next-line @typescript-eslint/no-use-before-define
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!)
if (segmentationType === SegmentType.CUSTOM) {
if (segmentationType === SegmentType.CUSTOM)
setCustomFileIndexingEstimate(res)
}
else {
else
setAutomaticFileIndexingEstimate(res)
indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price })
}
}
const confirmChangeCustomConfig = () => {
@@ -310,6 +308,19 @@ const StepTwo = ({
defaultModel: rerankDefaultModel,
currentModel: isRerankDefaultModelVaild,
} = useModelListAndDefaultModelAndCurrentProviderAndModel(ModelTypeEnum.rerank)
const { data: embeddingModelList } = useModelList(ModelTypeEnum.textEmbedding)
const { data: defaultEmbeddingModel } = useDefaultModel(ModelTypeEnum.textEmbedding)
const [embeddingModel, setEmbeddingModel] = useState<DefaultModel>(
currentDataset?.embedding_model
? {
provider: currentDataset.embedding_model_provider,
model: currentDataset.embedding_model,
}
: {
provider: defaultEmbeddingModel?.provider.provider || '',
model: defaultEmbeddingModel?.model || '',
},
)
const getCreationParams = () => {
let params
if (segmentationType === SegmentType.CUSTOM && overlap > max) {
@@ -324,6 +335,8 @@ const StepTwo = ({
process_rule: getProcessRule(),
// eslint-disable-next-line @typescript-eslint/no-use-before-define
retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
embedding_model: embeddingModel.model, // Readonly
embedding_model_provider: embeddingModel.provider, // Readonly
} as CreateDocumentReq
}
else { // create
@@ -360,6 +373,8 @@ const StepTwo = ({
doc_language: docLanguage,
retrieval_model: postRetrievalConfig,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
} as CreateDocumentReq
if (dataSourceType === DataSourceType.FILE) {
params.data_source.info_list.file_info_list = {
@@ -613,14 +628,17 @@ const StepTwo = ({
<div className={s.formRow}>
<div className='w-full'>
<div className={s.label}>{t('datasetCreation.stepTwo.maxLength')}</div>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='relative w-full'>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
</div>
</div>
</div>
<div className={s.formRow}>
@@ -635,14 +653,17 @@ const StepTwo = ({
}
/>
</div>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='relative w-full'>
<input
type="number"
className={s.input}
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<div className='absolute top-2.5 right-2.5 text-text-tertiary system-sm-regular'>Tokens</div>
</div>
</div>
</div>
<div className={s.formRow}>
@@ -675,7 +696,7 @@ const StepTwo = ({
!isAPIKeySet && s.disabled,
!hasSetIndexType && indexType === IndexingType.QUALIFIED && s.active,
hasSetIndexType && s.disabled,
hasSetIndexType && '!w-full',
hasSetIndexType && '!w-full !min-h-[96px]',
)}
onClick={() => {
if (isAPIKeySet)
@@ -690,16 +711,6 @@ const StepTwo = ({
{!hasSetIndexType && <span className={s.recommendTag}>{t('datasetCreation.stepTwo.recommend')}</span>}
</div>
<div className={s.tip}>{t('datasetCreation.stepTwo.qualifiedTip')}</div>
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
{
estimateTokes
? (
<div className='text-xs font-medium text-gray-800'>{formatNumber(estimateTokes.tokens)} tokens(<span className='text-yellow-500'>${formatNumber(estimateTokes.total_price)}</span>)</div>
)
: (
<div className={s.calculating}>{t('datasetCreation.stepTwo.calculating')}</div>
)
}
</div>
{!isAPIKeySet && (
<div className={s.warningTip}>
@@ -717,7 +728,7 @@ const StepTwo = ({
s.indexItem,
!hasSetIndexType && indexType === IndexingType.ECONOMICAL && s.active,
hasSetIndexType && s.disabled,
hasSetIndexType && '!w-full',
hasSetIndexType && '!w-full !min-h-[96px]',
)}
onClick={changeToEconomicalType}
>
@@ -726,13 +737,11 @@ const StepTwo = ({
<div className={s.typeHeader}>
<div className={s.title}>{t('datasetCreation.stepTwo.economical')}</div>
<div className={s.tip}>{t('datasetCreation.stepTwo.economicalTip')}</div>
<div className='pb-0.5 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.emstimateCost')}</div>
<div className='text-xs font-medium text-gray-800'>0 tokens</div>
</div>
</div>
)}
</div>
{hasSetIndexType && (
{hasSetIndexType && indexType === IndexingType.ECONOMICAL && (
<div className='mt-2 text-xs text-gray-500 font-medium'>
{t('datasetCreation.stepTwo.indexSettedTip')}
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
@@ -767,12 +776,32 @@ const StepTwo = ({
)}
</div>
)}
{/* Embedding model */}
{indexType === IndexingType.QUALIFIED && (
<div className='mb-2'>
<div className={cn(s.label, datasetId && 'flex justify-between items-center')}>{t('datasetSettings.form.embeddingModel')}</div>
<ModelSelector
readonly={!!datasetId}
defaultModel={embeddingModel}
modelList={embeddingModelList}
onSelect={(model: DefaultModel) => {
setEmbeddingModel(model)
}}
/>
{!!datasetId && (
<div className='mt-2 text-xs text-gray-500 font-medium'>
{t('datasetCreation.stepTwo.indexSettedTip')}
<Link className='text-[#155EEF]' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
</div>
)}
</div>
)}
{/* Retrieval Method Config */}
<div>
{!datasetId
? (
<div className={s.label}>
{t('datasetSettings.form.retrievalSetting.title')}
<div className='shrink-0 mr-4'>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-6-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.longDescription')}