feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)
This commit is contained in:
@@ -33,6 +33,7 @@ import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/componen
|
||||
import Toast from '@/app/components/base/toast'
|
||||
import { formatNumber } from '@/utils/format'
|
||||
import type { NotionPage } from '@/models/common'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import { DataSourceType, DocForm } from '@/models/datasets'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
@@ -63,7 +64,8 @@ type StepTwoProps = {
|
||||
notionPages?: NotionPage[]
|
||||
websitePages?: CrawlResultItem[]
|
||||
crawlOptions?: CrawlOptions
|
||||
fireCrawlJobId?: string
|
||||
websiteCrawlProvider?: DataSourceProvider
|
||||
websiteCrawlJobId?: string
|
||||
onStepChange?: (delta: number) => void
|
||||
updateIndexingTypeCache?: (type: string) => void
|
||||
updateResultCache?: (res: createDocumentResponse) => void
|
||||
@@ -94,7 +96,8 @@ const StepTwo = ({
|
||||
notionPages = [],
|
||||
websitePages = [],
|
||||
crawlOptions,
|
||||
fireCrawlJobId = '',
|
||||
websiteCrawlProvider = DataSourceProvider.fireCrawl,
|
||||
websiteCrawlJobId = '',
|
||||
onStepChange,
|
||||
updateIndexingTypeCache,
|
||||
updateResultCache,
|
||||
@@ -260,8 +263,8 @@ const StepTwo = ({
|
||||
|
||||
const getWebsiteInfo = () => {
|
||||
return {
|
||||
provider: 'firecrawl',
|
||||
job_id: fireCrawlJobId,
|
||||
provider: websiteCrawlProvider,
|
||||
job_id: websiteCrawlJobId,
|
||||
urls: websitePages.map(page => page.source_url),
|
||||
only_main_content: crawlOptions?.only_main_content,
|
||||
}
|
||||
|
Reference in New Issue
Block a user