diff --git a/dsLightRag/KeDaXunFei/1.xml b/dsLightRag/KeDaXunFei/1.xml new file mode 100644 index 00000000..5301e64b --- /dev/null +++ b/dsLightRag/KeDaXunFei/1.xml @@ -0,0 +1,257 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py b/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py index 449b0863..c0f694d9 100644 --- a/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py +++ b/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py @@ -56,25 +56,75 @@ class XunFeiAudioEvaluator_cn: return self.host_url + '?' + urlencode(dict_data) + def parse_evaluation_results(self, xml_content): + """解析评测结果XML并提得分信息(整合版)""" + try: + root = ET.fromstring(xml_content) + + # 查找包含评分数据的节点(优先read_chapter,兼容read_sentence) + score_nodes = root.findall('.//read_chapter') + root.findall('.//read_sentence') + if not score_nodes: + print("未找到评分节点") + return + + # 优先使用第一个包含total_score的节点 + target_node = None + for node in score_nodes: + if 'total_score' in node.attrib: + target_node = node + break + + if not target_node: + print("未找到包含评分数据的有效节点") + return + + # 提取核心评分字段 + self.evaluation_results = { + 'total_score': float(target_node.get('total_score', 0)), + 'accuracy_score': float(target_node.get('accuracy_score', 0)), + 'fluency_score': float(target_node.get('fluency_score', 0)), + 'integrity_score': float(target_node.get('integrity_score', 0)), + 'tone_score': float(target_node.get('tone_score', 0)), + 'phone_score': float(target_node.get('phone_score', 0)), + 'emotion_score': float(target_node.get('emotion_score', 0)), + 'is_rejected': target_node.get('is_rejected', 'false') == 'true' + } + + # 提取句子级得分 + sentences = [] + for sent in target_node.findall('.//sentence'): + sentences.append({ + 'content': sent.get('content', ''), + 'total_score': float(sent.get('total_score', 0)), + 'fluency_score': float(sent.get('fluency_score', 0)) + }) + self.evaluation_results['sentences'] = sentences + + print(f"解析成功,提取到{len(self.evaluation_results)}个评分字段") + + except ET.ParseError as e: + print(f"XML解析失败: {e}, 内容: {xml_content[:200]}") + self.evaluation_results = {'error': f'解析失败: {str(e)}'} + def on_message(self, ws, message): """WebSocket消息处理""" print(f"Received message: {message}") try: data = json.loads(message) if data.get('code') != 0: - self.results = {'error': data.get('message', 'Unknown error')} + self.evaluation_results = {'error': data.get('message', 'Unknown error')} return - # 修复:移除状态码为2的硬性检查,只要有数据就解析 inner_data = data.get('data', {}) xml_b64 = inner_data.get('data', '') if xml_b64: xml_data = base64.b64decode(xml_b64) xml_content = xml_data.decode("utf-8") - self.parse_evaluation_results(xml_content) # 确保中文也能进入解析 + self.parse_evaluation_results(xml_content) # 直接调用整合后的解析方法 + ws.close() except Exception as e: print(f"Error processing message: {e}") - self.results = {'error': str(e)} + self.evaluation_results = {'error': str(e)} # 安全获取data字段 response_data = data.get("data", {}) @@ -164,81 +214,6 @@ class XunFeiAudioEvaluator_cn: total_sent += len(buffer) time.sleep(0.04) - def parse_evaluation_results(self, xml_content): - """解析中文评测XML结果(仅处理read_sentence节点)""" - try: - root = ET.fromstring(xml_content) - # 中文专用:查找read_chapter节点(原read_sentence) - read_chapter = root.find('.//read_chapter') - if read_chapter is not None: - # 中文评分字段映射(修正节点名称和属性) - self.evaluation_results = { - 'total_score': float(read_chapter.get('total_score', 0)), - 'accuracy_score': float(read_chapter.get('accuracy_score', 0)), - 'fluency_score': float(read_chapter.get('fluency_score', 0)), - 'integrity_score': float(read_chapter.get('integrity_score', 0)), # 修正字段名 - 'tone_score': float(read_chapter.get('tone_score', 0)), - 'is_rejected': read_chapter.get('is_rejected', 'false') == 'true' - } - # 提取句子级得分 - sentences = [] - for sent in read_chapter.findall('.//sentence'): - sentences.append({ - 'content': sent.get('content', ''), - 'total_score': float(sent.get('total_score', 0)), - 'fluency_score': float(sent.get('fluency_score', 0)) - }) - self.evaluation_results['sentences'] = sentences - print(f"中文评测结果解析成功: {self.evaluation_results}") - else: - print(f"未找到中文评分节点: {xml_content[:200]}") # 打印前200字符调试 - self.evaluation_results = {'error': '未找到read_sentence节点'} - except ET.ParseError as e: - print(f"XML解析失败: {e}, 内容: {xml_content[:200]}") - self.evaluation_results = {'error': f'解析失败: {str(e)}'} - """解析评测结果XML并提得分信息""" - try: - print(xml_content) - root = ET.fromstring(xml_content) - - # 查找read_chapter节点 - read_chapter = root.find('.//read_chapter') - if read_chapter is not None: - # 保持字段名一致,使用completeness_score - self.evaluation_results = { - 'accuracy_score': float(read_chapter.get('accuracy_score', 0)), - 'fluency_score': float(read_chapter.get('fluency_score', 0)), - 'completeness_score': float(read_chapter.get('integrity_score', 0)), - 'standard_score': float(read_chapter.get('standard_score', 0)), - 'total_score': float(read_chapter.get('total_score', 0)), - 'word_count': int(read_chapter.get('word_count', 0)), - 'is_rejected': read_chapter.get('is_rejected', 'false') == 'true' - } - - # 提取句子级别得分 - sentence = read_chapter.find('.//sentence') - if sentence is not None: - self.evaluation_results['sentence'] = { - 'accuracy_score': float(sentence.get('accuracy_score', 0)), - 'fluency_score': float(sentence.get('fluency_score', 0)), - 'total_score': float(sentence.get('total_score', 0)) - } - - # 提取单词级别得分 - words = [] - for word in read_chapter.findall('.//word'): - word_data = { - 'content': word.get('content', ''), - 'total_score': float(word.get('total_score', 0)), - 'dp_message': int(word.get('dp_message', 0)) - } - words.append(word_data) - - self.evaluation_results['words'] = words - - except ET.ParseError as e: - print(f"XML解析错误: {e}") - def get_evaluation_summary(self): """获取评测结果摘要""" if not self.evaluation_results: diff --git a/dsLightRag/KeDaXunFei/__pycache__/XunFeiAudioEvaluator_cn.cpython-310.pyc b/dsLightRag/KeDaXunFei/__pycache__/XunFeiAudioEvaluator_cn.cpython-310.pyc index 9000e793..139cee2f 100644 Binary files a/dsLightRag/KeDaXunFei/__pycache__/XunFeiAudioEvaluator_cn.cpython-310.pyc and b/dsLightRag/KeDaXunFei/__pycache__/XunFeiAudioEvaluator_cn.cpython-310.pyc differ diff --git a/dsLightRag/KeDaXunFei/getCnScore.py b/dsLightRag/KeDaXunFei/getCnScore.py new file mode 100644 index 00000000..a1a3e956 --- /dev/null +++ b/dsLightRag/KeDaXunFei/getCnScore.py @@ -0,0 +1,28 @@ +import os +from XunFeiAudioEvaluator_cn import XunFeiAudioEvaluator_cn +import xml.etree.ElementTree as ET + +if __name__ == "__main__": + log_path = r"1.xml" + evaluator = XunFeiAudioEvaluator_cn(None, None, None, None, "") # 使用空字符串代替None作为txt参数 + + if os.path.exists(log_path): + try: + # 读取XML文件内容 + with open(log_path, 'r', encoding='utf-8') as f: + xml_content = f.read() + + # 使用XunFeiAudioEvaluator_cn类的解析方法 + evaluator.parse_evaluation_results(xml_content) + results = evaluator.evaluation_results + + if results and 'total_score' in results: + print("解析成功!以下是评分数据:") + for key, value in results.items(): + print(f"{key}: {value}") + else: + print("未找到有效的评分数据") + except Exception as e: + print(f"解析日志时出错:{str(e)}") + else: + print(f"文件不存在:{log_path}") \ No newline at end of file diff --git a/dsLightRag/Routes/XunFeiRoute_en.py b/dsLightRag/Routes/XunFeiRoute_en.py index d81de303..70c7fbbc 100644 --- a/dsLightRag/Routes/XunFeiRoute_en.py +++ b/dsLightRag/Routes/XunFeiRoute_en.py @@ -39,16 +39,17 @@ async def save_audio(audio: UploadFile = File(...), txt: str = Form(...), langua # 添加wav转mp3功能 import subprocess - mp3_temp_file = os.path.join(temp_dir, f"temp_{uuid.uuid4().hex}.mp3") + mp3_file_path = os.path.join(temp_dir, f"temp_{uuid.uuid4().hex}.mp3") try: # 使用ffmpeg将wav转换为mp3 subprocess.run( - ["ffmpeg", "-i", temp_file, "-y", mp3_temp_file], + ["ffmpeg", "-i", temp_file, "-ar", "16000", "-ac", "1", "-acodec", "libmp3lame", "-b:a", "128k", "-y", + mp3_file_path], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - logger.info(f"已将wav转换为mp3: {mp3_temp_file}") + logger.info(f"已将wav转换为mp3: {mp3_file_path}") except subprocess.CalledProcessError as e: logger.error(f"ffmpeg转换失败: {e.stderr.decode()}") raise Exception(f"音频格式转换失败: {e.stderr.decode()}") @@ -59,7 +60,7 @@ async def save_audio(audio: UploadFile = File(...), txt: str = Form(...), langua appid=XF_APPID, api_key=XF_APIKEY, api_secret=XF_APISECRET, - audio_file=mp3_temp_file, + audio_file=mp3_file_path, language=language, # 使用动态参数 txt=txt ) diff --git a/dsLightRag/Routes/__pycache__/XunFeiRoute_cn.cpython-310.pyc b/dsLightRag/Routes/__pycache__/XunFeiRoute_cn.cpython-310.pyc index 60e6060b..879864fb 100644 Binary files a/dsLightRag/Routes/__pycache__/XunFeiRoute_cn.cpython-310.pyc and b/dsLightRag/Routes/__pycache__/XunFeiRoute_cn.cpython-310.pyc differ diff --git a/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.mp3 b/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.mp3 new file mode 100644 index 00000000..66b6b4a2 Binary files /dev/null and b/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.mp3 differ diff --git a/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.wav b/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.wav new file mode 100644 index 00000000..2f284028 Binary files /dev/null and b/dsLightRag/static/audio/audio_f560366edc7d462890d3f897a1c9e6ab.wav differ diff --git a/dsLightRag/static/js/main.js b/dsLightRag/static/js/main.js index b6a49a18..afd77d9c 100644 --- a/dsLightRag/static/js/main.js +++ b/dsLightRag/static/js/main.js @@ -83,4 +83,9 @@ document.addEventListener('DOMContentLoaded', function() { // 初始化轮播 initCarousel(); -}); \ No newline at end of file +}); + +// 修复前可能存在字段名不匹配问题 +$(".accuracy-score").text(response.evaluation.accuracy_score.toFixed(4)); +$(".fluency-score").text(response.evaluation.fluency_score.toFixed(4)); +$(".total-score").text(response.evaluation.total_score.toFixed(4)); \ No newline at end of file