From 8362365eae5ade8f35a153debc9f2671167d3083 Mon Sep 17 00:00:00 2001 From: lyzno1 <92089059+lyzno1@users.noreply.github.com> Date: Fri, 8 Aug 2025 22:58:52 +0800 Subject: [PATCH] Fix file type misclassification in logs interface (#23641) --- .../base/file-uploader/utils.spec.ts | 234 +++++++++++++++++- .../components/base/file-uploader/utils.ts | 24 +- 2 files changed, 250 insertions(+), 8 deletions(-) diff --git a/web/app/components/base/file-uploader/utils.spec.ts b/web/app/components/base/file-uploader/utils.spec.ts index 4a3408ef0..774c38eb5 100644 --- a/web/app/components/base/file-uploader/utils.spec.ts +++ b/web/app/components/base/file-uploader/utils.spec.ts @@ -36,7 +36,7 @@ describe('file-uploader utils', () => { }) describe('fileUpload', () => { - it('should handle successful file upload', async () => { + it('should handle successful file upload', () => { const mockFile = new File(['test'], 'test.txt') const mockCallbacks = { onProgressCallback: jest.fn(), @@ -46,13 +46,12 @@ describe('file-uploader utils', () => { jest.mocked(upload).mockResolvedValue({ id: '123' }) - await fileUpload({ + fileUpload({ file: mockFile, ...mockCallbacks, }) expect(upload).toHaveBeenCalled() - expect(mockCallbacks.onSuccessCallback).toHaveBeenCalledWith({ id: '123' }) }) }) @@ -284,7 +283,23 @@ describe('file-uploader utils', () => { }) describe('getProcessedFilesFromResponse', () => { - it('should process files correctly', () => { + beforeEach(() => { + jest.mocked(mime.getAllExtensions).mockImplementation((mimeType: string) => { + const mimeMap: Record> = { + 'image/jpeg': new Set(['jpg', 'jpeg']), + 'image/png': new Set(['png']), + 'image/gif': new Set(['gif']), + 'video/mp4': new Set(['mp4']), + 'audio/mp3': new Set(['mp3']), + 'application/pdf': new Set(['pdf']), + 'text/plain': new Set(['txt']), + 'application/json': new Set(['json']), + } + return mimeMap[mimeType] || new Set() + }) + }) + + it('should process files correctly without type correction', () => { const files = [{ related_id: '2a38e2ca-1295-415d-a51d-65d4ff9912d9', extension: '.jpeg', @@ -294,6 +309,8 @@ describe('file-uploader utils', () => { transfer_method: TransferMethod.local_file, type: 'image', url: 'https://upload.dify.dev/files/xxx/file-preview', + upload_file_id: '2a38e2ca-1295-415d-a51d-65d4ff9912d9', + remote_url: '', }] const result = getProcessedFilesFromResponse(files) @@ -309,6 +326,215 @@ describe('file-uploader utils', () => { url: 'https://upload.dify.dev/files/xxx/file-preview', }) }) + + it('should correct image file misclassified as document', () => { + const files = [{ + related_id: '123', + extension: '.jpg', + filename: 'image.jpg', + size: 1024, + mime_type: 'image/jpeg', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/image.jpg', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('image') + }) + + it('should correct video file misclassified as document', () => { + const files = [{ + related_id: '123', + extension: '.mp4', + filename: 'video.mp4', + size: 1024, + mime_type: 'video/mp4', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/video.mp4', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('video') + }) + + it('should correct audio file misclassified as document', () => { + const files = [{ + related_id: '123', + extension: '.mp3', + filename: 'audio.mp3', + size: 1024, + mime_type: 'audio/mp3', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/audio.mp3', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('audio') + }) + + it('should correct document file misclassified as image', () => { + const files = [{ + related_id: '123', + extension: '.pdf', + filename: 'document.pdf', + size: 1024, + mime_type: 'application/pdf', + transfer_method: TransferMethod.local_file, + type: 'image', + url: 'https://example.com/document.pdf', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('document') + }) + + it('should NOT correct when filename and MIME type conflict', () => { + const files = [{ + related_id: '123', + extension: '.pdf', + filename: 'document.pdf', + size: 1024, + mime_type: 'image/jpeg', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/document.pdf', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('document') + }) + + it('should NOT correct when filename and MIME type both point to wrong type', () => { + const files = [{ + related_id: '123', + extension: '.jpg', + filename: 'image.jpg', + size: 1024, + mime_type: 'image/jpeg', + transfer_method: TransferMethod.local_file, + type: 'image', + url: 'https://example.com/image.jpg', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('image') + }) + + it('should handle files with missing filename', () => { + const files = [{ + related_id: '123', + extension: '', + filename: '', + size: 1024, + mime_type: 'image/jpeg', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/file', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('document') + }) + + it('should handle files with missing MIME type', () => { + const files = [{ + related_id: '123', + extension: '.jpg', + filename: 'image.jpg', + size: 1024, + mime_type: '', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/image.jpg', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('document') + }) + + it('should handle files with unknown extensions', () => { + const files = [{ + related_id: '123', + extension: '.unknown', + filename: 'file.unknown', + size: 1024, + mime_type: 'application/unknown', + transfer_method: TransferMethod.local_file, + type: 'document', + url: 'https://example.com/file.unknown', + upload_file_id: '123', + remote_url: '', + }] + + const result = getProcessedFilesFromResponse(files) + expect(result[0].supportFileType).toBe('document') + }) + + it('should handle multiple different file types correctly', () => { + const files = [ + { + related_id: '1', + extension: '.jpg', + filename: 'correct-image.jpg', + mime_type: 'image/jpeg', + type: 'image', + size: 1024, + transfer_method: TransferMethod.local_file, + url: 'https://example.com/correct-image.jpg', + upload_file_id: '1', + remote_url: '', + }, + { + related_id: '2', + extension: '.png', + filename: 'misclassified-image.png', + mime_type: 'image/png', + type: 'document', + size: 2048, + transfer_method: TransferMethod.local_file, + url: 'https://example.com/misclassified-image.png', + upload_file_id: '2', + remote_url: '', + }, + { + related_id: '3', + extension: '.pdf', + filename: 'conflicted.pdf', + mime_type: 'image/jpeg', + type: 'document', + size: 3072, + transfer_method: TransferMethod.local_file, + url: 'https://example.com/conflicted.pdf', + upload_file_id: '3', + remote_url: '', + }, + ] + + const result = getProcessedFilesFromResponse(files) + + expect(result[0].supportFileType).toBe('image') // correct, no change + expect(result[1].supportFileType).toBe('image') // corrected from document to image + expect(result[2].supportFileType).toBe('document') // conflict, no change + }) }) describe('getFileNameFromUrl', () => { diff --git a/web/app/components/base/file-uploader/utils.ts b/web/app/components/base/file-uploader/utils.ts index e870f9eda..9c217646c 100644 --- a/web/app/components/base/file-uploader/utils.ts +++ b/web/app/components/base/file-uploader/utils.ts @@ -70,10 +70,13 @@ export const getFileExtension = (fileName: string, fileMimetype: string, isRemot } } if (!extension) { - if (extensions.size > 0) - extension = extensions.values().next().value.toLowerCase() - else + if (extensions.size > 0) { + const firstExtension = extensions.values().next().value + extension = firstExtension ? firstExtension.toLowerCase() : '' + } + else { extension = extensionInFileName + } } if (isRemote) @@ -145,6 +148,19 @@ export const getProcessedFiles = (files: FileEntity[]) => { export const getProcessedFilesFromResponse = (files: FileResponse[]) => { return files.map((fileItem) => { + let supportFileType = fileItem.type + + if (fileItem.filename && fileItem.mime_type) { + const detectedTypeFromFileName = getSupportFileType(fileItem.filename, '') + const detectedTypeFromMime = getSupportFileType('', fileItem.mime_type) + + if (detectedTypeFromFileName + && detectedTypeFromMime + && detectedTypeFromFileName === detectedTypeFromMime + && detectedTypeFromFileName !== fileItem.type) + supportFileType = detectedTypeFromFileName + } + return { id: fileItem.related_id, name: fileItem.filename, @@ -152,7 +168,7 @@ export const getProcessedFilesFromResponse = (files: FileResponse[]) => { type: fileItem.mime_type, progress: 100, transferMethod: fileItem.transfer_method, - supportFileType: fileItem.type, + supportFileType, uploadedId: fileItem.upload_file_id || fileItem.related_id, url: fileItem.url || fileItem.remote_url, }