diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 9f48b4886..8e6150f9c 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -451,7 +451,7 @@ def _extract_text_from_excel(file_content: bytes) -> str: df = df.applymap(lambda x: " ".join(str(x).splitlines()) if isinstance(x, str) else x) # type: ignore # Combine multi-line text in column names into a single line - df.columns = pd.Index([" ".join(col.splitlines()) for col in df.columns]) + df.columns = pd.Index([" ".join(str(col).splitlines()) for col in df.columns]) # Manually construct the Markdown table markdown_table += _construct_markdown_table(df) + "\n\n" diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 76bb640d1..66c7818ad 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -342,3 +342,26 @@ def test_extract_text_from_excel_all_sheets_fail(mock_excel_file): assert result == "" assert mock_excel_instance.parse.call_count == 2 + + +@patch("pandas.ExcelFile") +def test_extract_text_from_excel_numeric_type_column(mock_excel_file): + """Test extracting text from Excel file with numeric column names.""" + + # Test numeric type column + data = {1: ["Test"], 1.1: ["Test"]} + + df = pd.DataFrame(data) + + # Mock ExcelFile + mock_excel_instance = Mock() + mock_excel_instance.sheet_names = ["Sheet1"] + mock_excel_instance.parse.return_value = df + mock_excel_file.return_value = mock_excel_instance + + file_content = b"fake_excel_content" + result = _extract_text_from_excel(file_content) + + expected_manual = "| 1.0 | 1.1 |\n| --- | --- |\n| Test | Test |\n\n" + + assert expected_manual == result