This commit is contained in:
2025-09-06 17:25:13 +08:00
parent b9de5438ec
commit 074c4b9384
9 changed files with 350 additions and 84 deletions

View File

@@ -56,25 +56,75 @@ class XunFeiAudioEvaluator_cn:
return self.host_url + '?' + urlencode(dict_data)
def parse_evaluation_results(self, xml_content):
"""解析评测结果XML并提得分信息整合版"""
try:
root = ET.fromstring(xml_content)
# 查找包含评分数据的节点优先read_chapter兼容read_sentence
score_nodes = root.findall('.//read_chapter') + root.findall('.//read_sentence')
if not score_nodes:
print("未找到评分节点")
return
# 优先使用第一个包含total_score的节点
target_node = None
for node in score_nodes:
if 'total_score' in node.attrib:
target_node = node
break
if not target_node:
print("未找到包含评分数据的有效节点")
return
# 提取核心评分字段
self.evaluation_results = {
'total_score': float(target_node.get('total_score', 0)),
'accuracy_score': float(target_node.get('accuracy_score', 0)),
'fluency_score': float(target_node.get('fluency_score', 0)),
'integrity_score': float(target_node.get('integrity_score', 0)),
'tone_score': float(target_node.get('tone_score', 0)),
'phone_score': float(target_node.get('phone_score', 0)),
'emotion_score': float(target_node.get('emotion_score', 0)),
'is_rejected': target_node.get('is_rejected', 'false') == 'true'
}
# 提取句子级得分
sentences = []
for sent in target_node.findall('.//sentence'):
sentences.append({
'content': sent.get('content', ''),
'total_score': float(sent.get('total_score', 0)),
'fluency_score': float(sent.get('fluency_score', 0))
})
self.evaluation_results['sentences'] = sentences
print(f"解析成功,提取到{len(self.evaluation_results)}个评分字段")
except ET.ParseError as e:
print(f"XML解析失败: {e}, 内容: {xml_content[:200]}")
self.evaluation_results = {'error': f'解析失败: {str(e)}'}
def on_message(self, ws, message):
"""WebSocket消息处理"""
print(f"Received message: {message}")
try:
data = json.loads(message)
if data.get('code') != 0:
self.results = {'error': data.get('message', 'Unknown error')}
self.evaluation_results = {'error': data.get('message', 'Unknown error')}
return
# 修复移除状态码为2的硬性检查只要有数据就解析
inner_data = data.get('data', {})
xml_b64 = inner_data.get('data', '')
if xml_b64:
xml_data = base64.b64decode(xml_b64)
xml_content = xml_data.decode("utf-8")
self.parse_evaluation_results(xml_content) # 确保中文也能进入解析
self.parse_evaluation_results(xml_content) # 直接调用整合后的解析方法
ws.close()
except Exception as e:
print(f"Error processing message: {e}")
self.results = {'error': str(e)}
self.evaluation_results = {'error': str(e)}
# 安全获取data字段
response_data = data.get("data", {})
@@ -164,81 +214,6 @@ class XunFeiAudioEvaluator_cn:
total_sent += len(buffer)
time.sleep(0.04)
def parse_evaluation_results(self, xml_content):
"""解析中文评测XML结果仅处理read_sentence节点"""
try:
root = ET.fromstring(xml_content)
# 中文专用查找read_chapter节点原read_sentence
read_chapter = root.find('.//read_chapter')
if read_chapter is not None:
# 中文评分字段映射(修正节点名称和属性)
self.evaluation_results = {
'total_score': float(read_chapter.get('total_score', 0)),
'accuracy_score': float(read_chapter.get('accuracy_score', 0)),
'fluency_score': float(read_chapter.get('fluency_score', 0)),
'integrity_score': float(read_chapter.get('integrity_score', 0)), # 修正字段名
'tone_score': float(read_chapter.get('tone_score', 0)),
'is_rejected': read_chapter.get('is_rejected', 'false') == 'true'
}
# 提取句子级得分
sentences = []
for sent in read_chapter.findall('.//sentence'):
sentences.append({
'content': sent.get('content', ''),
'total_score': float(sent.get('total_score', 0)),
'fluency_score': float(sent.get('fluency_score', 0))
})
self.evaluation_results['sentences'] = sentences
print(f"中文评测结果解析成功: {self.evaluation_results}")
else:
print(f"未找到中文评分节点: {xml_content[:200]}") # 打印前200字符调试
self.evaluation_results = {'error': '未找到read_sentence节点'}
except ET.ParseError as e:
print(f"XML解析失败: {e}, 内容: {xml_content[:200]}")
self.evaluation_results = {'error': f'解析失败: {str(e)}'}
"""解析评测结果XML并提得分信息"""
try:
print(xml_content)
root = ET.fromstring(xml_content)
# 查找read_chapter节点
read_chapter = root.find('.//read_chapter')
if read_chapter is not None:
# 保持字段名一致使用completeness_score
self.evaluation_results = {
'accuracy_score': float(read_chapter.get('accuracy_score', 0)),
'fluency_score': float(read_chapter.get('fluency_score', 0)),
'completeness_score': float(read_chapter.get('integrity_score', 0)),
'standard_score': float(read_chapter.get('standard_score', 0)),
'total_score': float(read_chapter.get('total_score', 0)),
'word_count': int(read_chapter.get('word_count', 0)),
'is_rejected': read_chapter.get('is_rejected', 'false') == 'true'
}
# 提取句子级别得分
sentence = read_chapter.find('.//sentence')
if sentence is not None:
self.evaluation_results['sentence'] = {
'accuracy_score': float(sentence.get('accuracy_score', 0)),
'fluency_score': float(sentence.get('fluency_score', 0)),
'total_score': float(sentence.get('total_score', 0))
}
# 提取单词级别得分
words = []
for word in read_chapter.findall('.//word'):
word_data = {
'content': word.get('content', ''),
'total_score': float(word.get('total_score', 0)),
'dp_message': int(word.get('dp_message', 0))
}
words.append(word_data)
self.evaluation_results['words'] = words
except ET.ParseError as e:
print(f"XML解析错误: {e}")
def get_evaluation_summary(self):
"""获取评测结果摘要"""
if not self.evaluation_results: