refactor: improve handling of leading punctuation removal (#10761)

2024-11-18 21:32:33 +08:00
parent 0ba17ec116
commit 14f3d44c37
5 changed files with 42 additions and 15 deletions
--- a/api/tests/unit_tests/utils/test_text_processing.py
+++ b/api/tests/unit_tests/utils/test_text_processing.py
@@ -0,0 +1,20 @@
+from textwrap import dedent
+
+import pytest
+
+from core.tools.utils.text_processing_utils import remove_leading_symbols
+
+
+@pytest.mark.parametrize(
+    ("input_text", "expected_output"),
+    [
+        ("...Hello, World!", "Hello, World!"),
+        ("。测试中文标点", "测试中文标点"),
+        ("!@#Test symbols", "Test symbols"),
+        ("Hello, World!", "Hello, World!"),
+        ("", ""),
+        ("   ", "   "),
+    ],
+)
+def test_remove_leading_symbols(input_text, expected_output):
+    assert remove_leading_symbols(input_text) == expected_output