Feat/firecrawl data source (#5232)

Co-authored-by: Nicolas <nicolascamara29@gmail.com>
Co-authored-by: chenhe <guchenhe@gmail.com>
Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
Jyong
2024-06-15 02:46:02 +08:00
committed by GitHub
parent 918ebe1620
commit ba5f8afaa8
36 changed files with 1174 additions and 64 deletions

View File

@@ -11,7 +11,7 @@ from core.rag.extractor.notion_extractor import NotionExtractor
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_database import db
from models.dataset import Dataset, Document, DocumentSegment
from models.source import DataSourceBinding
from models.source import DataSourceOauthBinding
@shared_task(queue='dataset')
@@ -43,12 +43,12 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
page_id = data_source_info['notion_page_id']
page_type = data_source_info['type']
page_edited_time = data_source_info['last_edited_time']
data_source_binding = DataSourceBinding.query.filter(
data_source_binding = DataSourceOauthBinding.query.filter(
db.and_(
DataSourceBinding.tenant_id == document.tenant_id,
DataSourceBinding.provider == 'notion',
DataSourceBinding.disabled == False,
DataSourceBinding.source_info['workspace_id'] == f'"{workspace_id}"'
DataSourceOauthBinding.tenant_id == document.tenant_id,
DataSourceOauthBinding.provider == 'notion',
DataSourceOauthBinding.disabled == False,
DataSourceOauthBinding.source_info['workspace_id'] == f'"{workspace_id}"'
)
).first()
if not data_source_binding: