feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)
This commit is contained in:
@@ -11,7 +11,7 @@ import { DataSourceType } from '@/models/datasets'
|
||||
import type { CrawlOptions, CrawlResultItem, DataSet, FileItem, createDocumentResponse } from '@/models/datasets'
|
||||
import { fetchDataSource } from '@/service/common'
|
||||
import { fetchDatasetDetail } from '@/service/datasets'
|
||||
import type { NotionPage } from '@/models/common'
|
||||
import { DataSourceProvider, type NotionPage } from '@/models/common'
|
||||
import { useModalContext } from '@/context/modal-context'
|
||||
import { useDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks'
|
||||
|
||||
@@ -26,6 +26,7 @@ const DEFAULT_CRAWL_OPTIONS: CrawlOptions = {
|
||||
excludes: '',
|
||||
limit: 10,
|
||||
max_depth: '',
|
||||
use_sitemap: true,
|
||||
}
|
||||
|
||||
const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
@@ -51,7 +52,8 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
const updateFileList = (preparedFiles: FileItem[]) => {
|
||||
setFiles(preparedFiles)
|
||||
}
|
||||
const [fireCrawlJobId, setFireCrawlJobId] = useState('')
|
||||
const [websiteCrawlProvider, setWebsiteCrawlProvider] = useState<DataSourceProvider>(DataSourceProvider.fireCrawl)
|
||||
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
|
||||
|
||||
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
|
||||
const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID)
|
||||
@@ -137,7 +139,8 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
onStepChange={nextStep}
|
||||
websitePages={websitePages}
|
||||
updateWebsitePages={setWebsitePages}
|
||||
onFireCrawlJobIdChange={setFireCrawlJobId}
|
||||
onWebsiteCrawlProviderChange={setWebsiteCrawlProvider}
|
||||
onWebsiteCrawlJobIdChange={setWebsiteCrawlJobId}
|
||||
crawlOptions={crawlOptions}
|
||||
onCrawlOptionsChange={setCrawlOptions}
|
||||
/>
|
||||
@@ -151,7 +154,8 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
||||
files={fileList.map(file => file.file)}
|
||||
notionPages={notionPages}
|
||||
websitePages={websitePages}
|
||||
fireCrawlJobId={fireCrawlJobId}
|
||||
websiteCrawlProvider={websiteCrawlProvider}
|
||||
websiteCrawlJobId={websiteCrawlJobId}
|
||||
onStepChange={changeStep}
|
||||
updateIndexingTypeCache={updateIndexingTypeCache}
|
||||
updateResultCache={updateResultCache}
|
||||
|
Reference in New Issue
Block a user