Feat/dify rag (#2528)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2024-02-22 23:31:57 +08:00
committed by GitHub
parent 97fe817186
commit 6c4e6bf1d6
119 changed files with 3181 additions and 5892 deletions

View File

@@ -9,8 +9,8 @@ from flask_login import current_user
from werkzeug.datastructures import FileStorage
from werkzeug.exceptions import NotFound
from core.data_loader.file_extractor import FileExtractor
from core.file.upload_file_parser import UploadFileParser
from core.rag.extractor.extract_processor import ExtractProcessor
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.account import Account
@@ -32,7 +32,8 @@ class FileService:
def upload_file(file: FileStorage, user: Union[Account, EndUser], only_image: bool = False) -> UploadFile:
extension = file.filename.split('.')[-1]
etl_type = current_app.config['ETL_TYPE']
allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS if etl_type == 'Unstructured' else ALLOWED_EXTENSIONS
allowed_extensions = UNSTRUSTURED_ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS if etl_type == 'Unstructured' \
else ALLOWED_EXTENSIONS + IMAGE_EXTENSIONS
if extension.lower() not in allowed_extensions:
raise UnsupportedFileTypeError()
elif only_image and extension.lower() not in IMAGE_EXTENSIONS:
@@ -136,7 +137,7 @@ class FileService:
if extension.lower() not in allowed_extensions:
raise UnsupportedFileTypeError()
text = FileExtractor.load(upload_file, return_text=True)
text = ExtractProcessor.load_from_upload_file(upload_file, return_text=True)
text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
return text
@@ -164,7 +165,7 @@ class FileService:
return generator, upload_file.mime_type
@staticmethod
def get_public_image_preview(file_id: str) -> str:
def get_public_image_preview(file_id: str) -> tuple[Generator, str]:
upload_file = db.session.query(UploadFile) \
.filter(UploadFile.id == file_id) \
.first()