You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

47 lines
1.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

def get_string_no_punctuation_or_emoji(s):
"""去除字符串首尾的空格、标点符号和表情符号"""
chars = list(s)
# 处理开头的字符
start = 0
while start < len(chars) and is_punctuation_or_emoji(chars[start]):
start += 1
# 处理结尾的字符
end = len(chars) - 1
while end >= start and is_punctuation_or_emoji(chars[end]):
end -= 1
return "".join(chars[start : end + 1])
def is_punctuation_or_emoji(char):
"""检查字符是否为空格、指定标点或表情符号"""
# 定义需要去除的中英文标点(包括全角/半角)
punctuation_set = {
"",
",", # 中文逗号 + 英文逗号
"",
".", # 中文句号 + 英文句号
"",
"!", # 中文感叹号 + 英文感叹号
"-",
"", # 英文连字符 + 中文全角横线
"", # 中文顿号
"[",
"]", # 方括号
"",
"", # 中文方括号
}
if char.isspace() or char in punctuation_set:
return True
# 检查表情符号(保留原有逻辑)
code_point = ord(char)
emoji_ranges = [
(0x1F600, 0x1F64F),
(0x1F300, 0x1F5FF),
(0x1F680, 0x1F6FF),
(0x1F900, 0x1F9FF),
(0x1FA70, 0x1FAFF),
(0x2600, 0x26FF),
(0x2700, 0x27BF),
]
return any(start <= code_point <= end for start, end in emoji_ranges)