feat: support firecrawl frontend code (#5226)

This commit is contained in:
Joel
2024-06-14 22:02:41 +08:00
committed by GitHub
parent 8d1386df0f
commit 28554350de
51 changed files with 1979 additions and 145 deletions

View File

@@ -323,6 +323,7 @@
}
.sourceContent {
width: 0;
flex: 1 1 auto;
}

View File

@@ -12,7 +12,7 @@ import RetrievalMethodInfo from '../../common/retrieval-method-info'
import PreviewItem, { PreviewType } from './preview-item'
import LanguageSelect from './language-select'
import s from './index.module.css'
import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
createDocument,
createFirstDocument,
@@ -44,6 +44,7 @@ import TooltipPlus from '@/app/components/base/tooltip-plus'
import { useModelListAndDefaultModelAndCurrentProviderAndModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
import { LanguagesSupported } from '@/i18n/language'
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
type ValueOf<T> = T[keyof T]
type StepTwoProps = {
@@ -56,6 +57,9 @@ type StepTwoProps = {
dataSourceType: DataSourceType
files: CustomFile[]
notionPages?: NotionPage[]
websitePages?: CrawlResultItem[]
crawlOptions?: CrawlOptions
fireCrawlJobId?: string
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
updateResultCache?: (res: createDocumentResponse) => void
@@ -79,9 +83,12 @@ const StepTwo = ({
onSetting,
datasetId,
indexingType,
dataSourceType,
dataSourceType: inCreatePageDataSourceType,
files,
notionPages = [],
websitePages = [],
crawlOptions,
fireCrawlJobId = '',
onStepChange,
updateIndexingTypeCache,
updateResultCache,
@@ -94,6 +101,8 @@ const StepTwo = ({
const isMobile = media === MediaType.mobile
const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext()
const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
const scrollRef = useRef<HTMLDivElement>(null)
const [scrolled, setScrolled] = useState(false)
const previewScrollRef = useRef<HTMLDivElement>(null)
@@ -242,6 +251,15 @@ const StepTwo = ({
}) as NotionInfo[]
}
const getWebsiteInfo = () => {
return {
provider: 'firecrawl',
job_id: fireCrawlJobId,
urls: websitePages.map(page => page.source_url),
only_main_content: crawlOptions?.only_main_content,
}
}
const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => {
if (dataSourceType === DataSourceType.FILE) {
return {
@@ -271,6 +289,19 @@ const StepTwo = ({
dataset_id: datasetId as string,
}
}
if (dataSourceType === DataSourceType.WEB) {
return {
info_list: {
data_source_type: dataSourceType,
website_info_list: getWebsiteInfo(),
},
indexing_technique: getIndexing_technique() as string,
process_rule: getProcessRule(),
doc_form: docForm,
doc_language: docLanguage,
dataset_id: datasetId as string,
}
}
}
const {
modelList: rerankModelList,
@@ -335,6 +366,9 @@ const StepTwo = ({
}
if (dataSourceType === DataSourceType.NOTION)
params.data_source.info_list.notion_info_list = getNotionInfo()
if (dataSourceType === DataSourceType.WEB)
params.data_source.info_list.website_info_list = getWebsiteInfo()
}
return params
}
@@ -819,6 +853,22 @@ const StepTwo = ({
</div>
</>
)}
{dataSourceType === DataSourceType.WEB && (
<>
<div className='mb-2 text-xs font-medium text-gray-500'>{t('datasetCreation.stepTwo.websiteSource')}</div>
<div className='flex items-center text-sm leading-6 font-medium text-gray-800'>
<Globe01 className='shrink-0 mr-1' />
<span className='grow w-0 truncate'>{websitePages[0].source_url}</span>
{websitePages.length > 1 && (
<span className={s.sourceCount}>
<span>{t('datasetCreation.stepTwo.other')}</span>
<span>{websitePages.length - 1}</span>
<span>{t('datasetCreation.stepTwo.webpageUnit')}</span>
</span>
)}
</div>
</>
)}
</div>
<div className={s.divider} />
<div className={s.segmentCount}>