refactor: improve handling of leading punctuation removal (#10761)

This commit is contained in:
Zane
2024-11-18 21:32:33 +08:00
committed by GitHub
parent 0ba17ec116
commit 14f3d44c37
5 changed files with 42 additions and 15 deletions

View File

@@ -0,0 +1,16 @@
import re
def remove_leading_symbols(text: str) -> str:
"""
Remove leading punctuation or symbols from the given text.
Args:
text (str): The input text to process.
Returns:
str: The text with leading punctuation or symbols removed.
"""
# Match Unicode ranges for punctuation and symbols
pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~]+"
return re.sub(pattern, "", text)