feat: support firecrawl frontend code (#5226)
This commit is contained in:
@@ -323,6 +323,7 @@
|
||||
}
|
||||
|
||||
.sourceContent {
|
||||
width: 0;
|
||||
flex: 1 1 auto;
|
||||
}
|
||||
|
||||
|
@@ -12,7 +12,7 @@ import RetrievalMethodInfo from '../../common/retrieval-method-info'
|
||||
import PreviewItem, { PreviewType } from './preview-item'
|
||||
import LanguageSelect from './language-select'
|
||||
import s from './index.module.css'
|
||||
import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
|
||||
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
|
||||
import {
|
||||
createDocument,
|
||||
createFirstDocument,
|
||||
@@ -44,6 +44,7 @@ import TooltipPlus from '@/app/components/base/tooltip-plus'
|
||||
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
import { LanguagesSupported } from '@/i18n/language'
|
||||
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
|
||||
|
||||
type ValueOf<T> = T[keyof T]
|
||||
type StepTwoProps = {
|
||||
@@ -56,6 +57,9 @@ type StepTwoProps = {
|
||||
dataSourceType: DataSourceType
|
||||
files: CustomFile[]
|
||||
notionPages?: NotionPage[]
|
||||
websitePages?: CrawlResultItem[]
|
||||
crawlOptions?: CrawlOptions
|
||||
fireCrawlJobId?: string
|
||||
onStepChange?: (delta: number) => void
|
||||
updateIndexingTypeCache?: (type: string) => void
|
||||
updateResultCache?: (res: createDocumentResponse) => void
|
||||
@@ -79,9 +83,12 @@ const StepTwo = ({
|
||||
onSetting,
|
||||
datasetId,
|
||||
indexingType,
|
||||
dataSourceType,
|
||||
dataSourceType: inCreatePageDataSourceType,
|
||||
files,
|
||||
notionPages = [],
|
||||
websitePages = [],
|
||||
crawlOptions,
|
||||
fireCrawlJobId = '',
|
||||
onStepChange,
|
||||
updateIndexingTypeCache,
|
||||
updateResultCache,
|
||||
@@ -94,6 +101,8 @@ const StepTwo = ({
|
||||
const isMobile = media === MediaType.mobile
|
||||
|
||||
const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
|
||||
const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
|
||||
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
|
||||
const scrollRef = useRef<HTMLDivElement>(null)
|
||||
const [scrolled, setScrolled] = useState(false)
|
||||
const previewScrollRef = useRef<HTMLDivElement>(null)
|
||||
@@ -242,6 +251,15 @@ const StepTwo = ({
|
||||
}) as NotionInfo[]
|
||||
}
|
||||
|
||||
const getWebsiteInfo = () => {
|
||||
return {
|
||||
provider: 'firecrawl',
|
||||
job_id: fireCrawlJobId,
|
||||
urls: websitePages.map(page => page.source_url),
|
||||
only_main_content: crawlOptions?.only_main_content,
|
||||
}
|
||||
}
|
||||
|
||||
const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
|
||||
if (dataSourceType === DataSourceType.FILE) {
|
||||
return {
|
||||
@@ -271,6 +289,19 @@ const StepTwo = ({
|
||||
dataset_id: datasetId as string,
|
||||
}
|
||||
}
|
||||
if (dataSourceType === DataSourceType.WEB) {
|
||||
return {
|
||||
info_list: {
|
||||
data_source_type: dataSourceType,
|
||||
website_info_list: getWebsiteInfo(),
|
||||
},
|
||||
indexing_technique: getIndexing_technique() as string,
|
||||
process_rule: getProcessRule(),
|
||||
doc_form: docForm,
|
||||
doc_language: docLanguage,
|
||||
dataset_id: datasetId as string,
|
||||
}
|
||||
}
|
||||
}
|
||||
const {
|
||||
modelList: rerankModelList,
|
||||
@@ -335,6 +366,9 @@ const StepTwo = ({
|
||||
}
|
||||
if (dataSourceType === DataSourceType.NOTION)
|
||||
params.data_source.info_list.notion_info_list = getNotionInfo()
|
||||
|
||||
if (dataSourceType === DataSourceType.WEB)
|
||||
params.data_source.info_list.website_info_list = getWebsiteInfo()
|
||||
}
|
||||
return params
|
||||
}
|
||||
@@ -819,6 +853,22 @@ const StepTwo = ({
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
{dataSourceType === DataSourceType.WEB && (
|
||||
<>
|
||||
<div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div>
|
||||
<div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
|
||||
<Globe01 className='shrink-0 mr-1' />
|
||||
<span className='grow w-0 truncate'>{websitePages[0].source_url}</span>
|
||||
{websitePages.length > 1 && (
|
||||
<span className={s.sourceCount}>
|
||||
<span>{t('datasetCreation.stepTwo.other')}</span>
|
||||
<span>{websitePages.length - 1}</span>
|
||||
<span>{t('datasetCreation.stepTwo.webpageUnit')}</span>
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
<div className={s.divider} />
|
||||
<div className={s.segmentCount}>
|
||||
|
Reference in New Issue
Block a user