fix remote file (#23127)

Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
This commit is contained in:
kenwoodjw
2025-07-31 10:30:54 +08:00
committed by GitHub
parent 1b2046da3f
commit 4251515b4e

View File

@@ -1,4 +1,6 @@
import mimetypes import mimetypes
import os
import urllib.parse
import uuid import uuid
from collections.abc import Callable, Mapping, Sequence from collections.abc import Callable, Mapping, Sequence
from typing import Any, cast from typing import Any, cast
@@ -240,16 +242,21 @@ def _build_from_remote_url(
def _get_remote_file_info(url: str): def _get_remote_file_info(url: str):
file_size = -1 file_size = -1
filename = url.split("/")[-1].split("?")[0] or "unknown_file" parsed_url = urllib.parse.urlparse(url)
mime_type = mimetypes.guess_type(filename)[0] or "" url_path = parsed_url.path
filename = os.path.basename(url_path)
# Initialize mime_type from filename as fallback
mime_type, _ = mimetypes.guess_type(filename)
resp = ssrf_proxy.head(url, follow_redirects=True) resp = ssrf_proxy.head(url, follow_redirects=True)
resp = cast(httpx.Response, resp) resp = cast(httpx.Response, resp)
if resp.status_code == httpx.codes.OK: if resp.status_code == httpx.codes.OK:
if content_disposition := resp.headers.get("Content-Disposition"): if content_disposition := resp.headers.get("Content-Disposition"):
filename = str(content_disposition.split("filename=")[-1].strip('"')) filename = str(content_disposition.split("filename=")[-1].strip('"'))
# Re-guess mime_type from updated filename
mime_type, _ = mimetypes.guess_type(filename)
file_size = int(resp.headers.get("Content-Length", file_size)) file_size = int(resp.headers.get("Content-Length", file_size))
mime_type = mime_type or str(resp.headers.get("Content-Type", ""))
return mime_type, filename, file_size return mime_type, filename, file_size