You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

47 lines
1.4 KiB

7 days ago
def get_string_no_punctuation_or_emoji(s):
"""去除字符串首尾的空格、标点符号和表情符号"""
chars = list(s)
# 处理开头的字符
start = 0
while start < len(chars) and is_punctuation_or_emoji(chars[start]):
start += 1
# 处理结尾的字符
end = len(chars) - 1
while end >= start and is_punctuation_or_emoji(chars[end]):
end -= 1
return "".join(chars[start : end + 1])
def is_punctuation_or_emoji(char):
"""检查字符是否为空格、指定标点或表情符号"""
# 定义需要去除的中英文标点(包括全角/半角)
punctuation_set = {
"",
",", # 中文逗号 + 英文逗号
"",
".", # 中文句号 + 英文句号
"",
"!", # 中文感叹号 + 英文感叹号
"-",
"", # 英文连字符 + 中文全角横线
"", # 中文顿号
"[",
"]", # 方括号
"",
"", # 中文方括号
}
if char.isspace() or char in punctuation_set:
return True
# 检查表情符号(保留原有逻辑)
code_point = ord(char)
emoji_ranges = [
(0x1F600, 0x1F64F),
(0x1F300, 0x1F5FF),
(0x1F680, 0x1F6FF),
(0x1F900, 0x1F9FF),
(0x1FA70, 0x1FAFF),
(0x2600, 0x26FF),
(0x2700, 0x27BF),
]
return any(start <= code_point <= end for start, end in emoji_ranges)