feat(website-crawl): add jina reader as additional alternative for website crawling (#8761)

This commit is contained in:
Zhaofeng Miao
2024-09-30 09:57:19 +08:00
committed by GitHub
parent fb49413a41
commit 369e1e6f58
38 changed files with 927 additions and 75 deletions

View File

@@ -9,7 +9,7 @@ import {
import { Lock01 } from '@/app/components/base/icons/src/vender/solid/security'
import Button from '@/app/components/base/button'
import type { FirecrawlConfig } from '@/models/common'
import Field from '@/app/components/datasets/create/website/firecrawl/base/field'
import Field from '@/app/components/datasets/create/website/base/field'
import Toast from '@/app/components/base/toast'
import { createDataSourceApiKeyBinding } from '@/service/datasets'
import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/general'

View File

@@ -0,0 +1,140 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useState } from 'react'
import { useTranslation } from 'react-i18next'
import {
PortalToFollowElem,
PortalToFollowElemContent,
} from '@/app/components/base/portal-to-follow-elem'
import { Lock01 } from '@/app/components/base/icons/src/vender/solid/security'
import Button from '@/app/components/base/button'
import { DataSourceProvider } from '@/models/common'
import Field from '@/app/components/datasets/create/website/base/field'
import Toast from '@/app/components/base/toast'
import { createDataSourceApiKeyBinding } from '@/service/datasets'
import { LinkExternal02 } from '@/app/components/base/icons/src/vender/line/general'
type Props = {
onCancel: () => void
onSaved: () => void
}
const I18N_PREFIX = 'datasetCreation.jinaReader'
const ConfigJinaReaderModal: FC<Props> = ({
onCancel,
onSaved,
}) => {
const { t } = useTranslation()
const [isSaving, setIsSaving] = useState(false)
const [apiKey, setApiKey] = useState('')
const handleSave = useCallback(async () => {
if (isSaving)
return
let errorMsg = ''
if (!errorMsg) {
if (!apiKey) {
errorMsg = t('common.errorMsg.fieldRequired', {
field: 'API Key',
})
}
}
if (errorMsg) {
Toast.notify({
type: 'error',
message: errorMsg,
})
return
}
const postData = {
category: 'website',
provider: DataSourceProvider.jinaReader,
credentials: {
auth_type: 'bearer',
config: {
api_key: apiKey,
},
},
}
try {
setIsSaving(true)
await createDataSourceApiKeyBinding(postData)
Toast.notify({
type: 'success',
message: t('common.api.success'),
})
}
finally {
setIsSaving(false)
}
onSaved()
}, [apiKey, onSaved, t, isSaving])
return (
<PortalToFollowElem open>
<PortalToFollowElemContent className='w-full h-full z-[60]'>
<div className='fixed inset-0 flex items-center justify-center bg-black/[.25]'>
<div className='mx-2 w-[640px] max-h-[calc(100vh-120px)] bg-white shadow-xl rounded-2xl overflow-y-auto'>
<div className='px-8 pt-8'>
<div className='flex justify-between items-center mb-4'>
<div className='text-xl font-semibold text-gray-900'>{t(`${I18N_PREFIX}.configJinaReader`)}</div>
</div>
<div className='space-y-4'>
<Field
label='API Key'
labelClassName='!text-sm'
isRequired
value={apiKey}
onChange={(value: string | number) => setApiKey(value as string)}
placeholder={t(`${I18N_PREFIX}.apiKeyPlaceholder`)!}
/>
</div>
<div className='my-8 flex justify-between items-center h-8'>
<a className='flex items-center space-x-1 leading-[18px] text-xs font-normal text-[#155EEF]' target='_blank' href='https://jina.ai/reader/'>
<span>{t(`${I18N_PREFIX}.getApiKeyLinkText`)}</span>
<LinkExternal02 className='w-3 h-3' />
</a>
<div className='flex'>
<Button
size='large'
className='mr-2'
onClick={onCancel}
>
{t('common.operation.cancel')}
</Button>
<Button
variant='primary'
size='large'
onClick={handleSave}
loading={isSaving}
>
{t('common.operation.save')}
</Button>
</div>
</div>
</div>
<div className='border-t-[0.5px] border-t-black/5'>
<div className='flex justify-center items-center py-3 bg-gray-50 text-xs text-gray-500'>
<Lock01 className='mr-1 w-3 h-3 text-gray-500' />
{t('common.modelProvider.encrypted.front')}
<a
className='text-primary-600 mx-1'
target='_blank' rel='noopener noreferrer'
href='https://pycryptodome.readthedocs.io/en/latest/src/cipher/oaep.html'
>
PKCS1_OAEP
</a>
{t('common.modelProvider.encrypted.back')}
</div>
</div>
</div>
</div>
</PortalToFollowElemContent>
</PortalToFollowElem>
)
}
export default React.memo(ConfigJinaReaderModal)

View File

@@ -2,11 +2,12 @@
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import Panel from '../panel'
import { DataSourceType } from '../panel/types'
import ConfigFirecrawlModal from './config-firecrawl-modal'
import ConfigJinaReaderModal from './config-jina-reader-modal'
import cn from '@/utils/classnames'
import s from '@/app/components/datasets/create/website/index.module.css'
import { fetchDataSources, removeDataSourceApiKeyBinding } from '@/service/datasets'
import type {
@@ -19,9 +20,11 @@ import {
} from '@/models/common'
import Toast from '@/app/components/base/toast'
type Props = {}
type Props = {
provider: DataSourceProvider
}
const DataSourceWebsite: FC<Props> = () => {
const DataSourceWebsite: FC<Props> = ({ provider }) => {
const { t } = useTranslation()
const { isCurrentWorkspaceManager } = useAppContext()
const [sources, setSources] = useState<DataSourceItem[]>([])
@@ -36,22 +39,26 @@ const DataSourceWebsite: FC<Props> = () => {
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
const [isShowConfig, {
setTrue: showConfig,
setFalse: hideConfig,
}] = useBoolean(false)
const [configTarget, setConfigTarget] = useState<DataSourceProvider | null>(null)
const showConfig = useCallback((provider: DataSourceProvider) => {
setConfigTarget(provider)
}, [setConfigTarget])
const hideConfig = useCallback(() => {
setConfigTarget(null)
}, [setConfigTarget])
const handleAdded = useCallback(() => {
checkSetApiKey()
hideConfig()
}, [checkSetApiKey, hideConfig])
const getIdByProvider = (provider: string): string | undefined => {
const getIdByProvider = (provider: DataSourceProvider): string | undefined => {
const source = sources.find(item => item.provider === provider)
return source?.id
}
const handleRemove = useCallback((provider: string) => {
const handleRemove = useCallback((provider: DataSourceProvider) => {
return async () => {
const dataSourceId = getIdByProvider(provider)
if (dataSourceId) {
@@ -69,22 +76,34 @@ const DataSourceWebsite: FC<Props> = () => {
<>
<Panel
type={DataSourceType.website}
isConfigured={sources.length > 0}
onConfigure={showConfig}
provider={provider}
isConfigured={sources.find(item => item.provider === provider) !== undefined}
onConfigure={() => showConfig(provider)}
readOnly={!isCurrentWorkspaceManager}
configuredList={sources.map(item => ({
configuredList={sources.filter(item => item.provider === provider).map(item => ({
id: item.id,
logo: ({ className }: { className: string }) => (
<div className={cn(className, 'flex items-center justify-center w-5 h-5 bg-white border border-gray-100 text-xs font-medium text-gray-500 rounded ml-3')}>🔥</div>
item.provider === DataSourceProvider.fireCrawl
? (
<div className={cn(className, 'flex items-center justify-center w-5 h-5 bg-white border border-gray-100 text-xs font-medium text-gray-500 rounded ml-3')}>🔥</div>
)
: (
<div className={cn(className, 'flex items-center justify-center w-5 h-5 bg-white border border-gray-100 text-xs font-medium text-gray-500 rounded ml-3')}>
<span className={s.jinaLogo} />
</div>
)
),
name: 'Firecrawl',
name: item.provider === DataSourceProvider.fireCrawl ? 'Firecrawl' : 'Jina Reader',
isActive: true,
}))}
onRemove={handleRemove(DataSourceProvider.fireCrawl)}
onRemove={handleRemove(provider)}
/>
{isShowConfig && (
{configTarget === DataSourceProvider.fireCrawl && (
<ConfigFirecrawlModal onSaved={handleAdded} onCancel={hideConfig} />
)}
{configTarget === DataSourceProvider.jinaReader && (
<ConfigJinaReaderModal onSaved={handleAdded} onCancel={hideConfig} />
)}
</>
)