Feat/firecrawl data source (#5232)

Co-authored-by: Nicolas <nicolascamara29@gmail.com>
Co-authored-by: chenhe <guchenhe@gmail.com>
Co-authored-by: takatost <takatost@gmail.com>
This commit is contained in:
Jyong
2024-06-15 02:46:02 +08:00
committed by GitHub
parent 918ebe1620
commit ba5f8afaa8
36 changed files with 1174 additions and 64 deletions

View File

@@ -4,3 +4,4 @@ from enum import Enum
class DatasourceType(Enum):
FILE = "upload_file"
NOTION = "notion_import"
WEBSITE = "website_crawl"

View File

@@ -1,3 +1,5 @@
from typing import Optional
from pydantic import BaseModel, ConfigDict
from models.dataset import Document
@@ -19,14 +21,33 @@ class NotionInfo(BaseModel):
super().__init__(**data)
class WebsiteInfo(BaseModel):
"""
website import info.
"""
provider: str
job_id: str
url: str
mode: str
tenant_id: str
only_main_content: bool = False
class Config:
arbitrary_types_allowed = True
def __init__(self, **data) -> None:
super().__init__(**data)
class ExtractSetting(BaseModel):
"""
Model class for provider response.
"""
datasource_type: str
upload_file: UploadFile = None
notion_info: NotionInfo = None
document_model: str = None
upload_file: Optional[UploadFile]
notion_info: Optional[NotionInfo]
website_info: Optional[WebsiteInfo]
document_model: Optional[str]
model_config = ConfigDict(arbitrary_types_allowed=True)
def __init__(self, **data) -> None: