fix: support custom file types in workflow Start node (#23672)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -168,7 +168,57 @@ def _extract_text_by_mime_type(*, file_content: bytes, mime_type: str) -> str:
|
||||
def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) -> str:
|
||||
"""Extract text from a file based on its file extension."""
|
||||
match file_extension:
|
||||
case ".txt" | ".markdown" | ".md" | ".html" | ".htm" | ".xml":
|
||||
case (
|
||||
".txt"
|
||||
| ".markdown"
|
||||
| ".md"
|
||||
| ".html"
|
||||
| ".htm"
|
||||
| ".xml"
|
||||
| ".c"
|
||||
| ".h"
|
||||
| ".cpp"
|
||||
| ".hpp"
|
||||
| ".cc"
|
||||
| ".cxx"
|
||||
| ".c++"
|
||||
| ".py"
|
||||
| ".js"
|
||||
| ".ts"
|
||||
| ".jsx"
|
||||
| ".tsx"
|
||||
| ".java"
|
||||
| ".php"
|
||||
| ".rb"
|
||||
| ".go"
|
||||
| ".rs"
|
||||
| ".swift"
|
||||
| ".kt"
|
||||
| ".scala"
|
||||
| ".sh"
|
||||
| ".bash"
|
||||
| ".bat"
|
||||
| ".ps1"
|
||||
| ".sql"
|
||||
| ".r"
|
||||
| ".m"
|
||||
| ".pl"
|
||||
| ".lua"
|
||||
| ".vim"
|
||||
| ".asm"
|
||||
| ".s"
|
||||
| ".css"
|
||||
| ".scss"
|
||||
| ".less"
|
||||
| ".sass"
|
||||
| ".ini"
|
||||
| ".cfg"
|
||||
| ".conf"
|
||||
| ".toml"
|
||||
| ".env"
|
||||
| ".log"
|
||||
| ".vtt"
|
||||
):
|
||||
return _extract_text_from_plain_text(file_content)
|
||||
case ".json":
|
||||
return _extract_text_from_json(file_content)
|
||||
@@ -194,8 +244,6 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str)
|
||||
return _extract_text_from_eml(file_content)
|
||||
case ".msg":
|
||||
return _extract_text_from_msg(file_content)
|
||||
case ".vtt":
|
||||
return _extract_text_from_vtt(file_content)
|
||||
case ".properties":
|
||||
return _extract_text_from_properties(file_content)
|
||||
case _:
|
||||
|
Reference in New Issue
Block a user