feat: document extractor chardet encoding (#20269)

Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
-LAN-
2025-05-27 13:27:46 +08:00
committed by GitHub
parent 756f35f480
commit 9c9d3d7bd0
2 changed files with 63 additions and 11 deletions

View File

@@ -150,7 +150,7 @@ def test_extract_text_from_plain_text_non_utf8():
temp_file.write(non_utf8_content)
temp_file.seek(0)
text = _extract_text_from_plain_text(temp_file.read())
assert text == "Hello, world."
assert text == "Hello, world©."
@patch("pypdfium2.PdfDocument")