'commit'

2025-09-06 10:29:48 +08:00
parent a12373b6d5
commit 4953e8712b
29 changed files with 65 additions and 44 deletions
--- a/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py
+++ b/dsLightRag/KeDaXunFei/XunFeiAudioEvaluator_cn.py
@@ -17,18 +17,16 @@ from Config.Config import XF_APPID, XF_APISECRET, XF_APIKEY


 class XunFeiAudioEvaluator_cn:
-    """讯飞语音评测类"""
-
-    def __init__(self, appid, api_key, api_secret, audio_file, language, txt):
+    """讯飞语音评测类（中文专用）"""
+    def __init__(self, appid, api_key, api_secret, audio_file, txt):
        self.appid = appid
        self.api_key = api_key
        self.api_secret = api_secret
        self.audio_file = audio_file
-        self.language = language
+        self.txt = txt.strip()  # 确保文本预处理
        self.host_url = "wss://ise-api.xfyun.cn/v2/open-ise"
+        self.evaluation_results = {}  # 统一结果存储变量
        self.websocket_url = ""
-        self.evaluation_results = {}
-        self.txt = txt

    def generate_auth_url(self):
        """生成鉴权URL"""
@@ -98,10 +96,10 @@ class XunFeiAudioEvaluator_cn:
        print(f"WebSocket connection closed,{ws}")

    def on_open(self, ws):
-        """连接建立处理"""
+        """连接建立处理（中文专用配置）"""
        print(f"WebSocket connection opened,{ws},ws连接建立成功...")

-        # 发送初始参数
+        # 发送初始参数（中文专用配置）
        send_dict = {
            "common": {
                "app_id": self.appid
@@ -110,13 +108,14 @@ class XunFeiAudioEvaluator_cn:
                "category": "read_sentence",
                "rstcd": "utf8",
                "sub": "ise",
-                "group": "pupil",
-                "ent": "cn_vip" if self.language == "chinese" else "en_vip",
+                "group": "pupil",  # 根据文档设置为小学组
+                "ent": "cn_vip",  # 固定中文引擎
                "tte": "utf-8",
                "cmd": "ssb",
-                "auf": "audio/L16;rate=16000",
-                "aue": "lame",
-                "text": '\uFEFF' + "[content]\n" + self.txt
+                "auf": "audio/L16;rate=16000",  # 修复参数格式错误
+                "aue": "raw",  # 中文推荐使用raw格式
+                "text": '\uFEFF' + "[content]\n" + self.txt,  # 确保UTF8 BOM
+                "extra_ability": "multi_dimension"  # 启用多维度评分
            },
            "data": {
                "status": 0,
@@ -167,21 +166,38 @@ class XunFeiAudioEvaluator_cn:
                time.sleep(0.04)

    def parse_evaluation_results(self, xml_content):
-        root = ET.fromstring(xml_content)
-        # 兼容中英文不同的根节点
-        read_node = root.find('.//read_sentence') or root.find('.//read_chapter')
-        if read_node is not None:
-            self.results = {
-                'total_score': read_node.attrib.get('total_score', '0'),
-                'accuracy': read_node.attrib.get('accuracy_score', '0'),
-                'fluency': read_node.attrib.get('fluency_score', '0'),
-                'integrity': read_node.attrib.get('integrity_score', '0')
-            }
-        else:
-            # 打印未匹配的XML结构用于调试
-            print(f"No evaluation nodes found in XML: {xml_content}")
-            self.results = {'error': '未找到有效评分节点'}
-        """解析评测结果XML并提取得分信息"""
+        """解析中文评测XML结果（仅处理read_sentence节点）"""
+        try:
+            root = ET.fromstring(xml_content)
+            # 中文专用：仅查找read_sentence节点
+            read_sentence = root.find('.//read_sentence')
+            if read_sentence is not None:
+                # 中文评分字段映射
+                self.evaluation_results = {
+                    'total_score': float(read_sentence.get('total_score', 0)),
+                    'accuracy_score': float(read_sentence.get('accuracy_score', 0)),
+                    'fluency_score': float(read_sentence.get('fluency_score', 0)),
+                    'integrity_score': float(read_sentence.get('integrity_score', 0)),
+                    'tone_score': float(read_sentence.get('tone_score', 0)),  # 中文特有调型分
+                    'is_rejected': read_sentence.get('is_rejected', 'false') == 'true'
+                }
+                # 提取句子级得分（中文特有）
+                sentences = []
+                for sent in read_sentence.findall('.//sentence'):
+                    sentences.append({
+                        'content': sent.get('content', ''),
+                        'total_score': float(sent.get('total_score', 0)),
+                        'fluency_score': float(sent.get('fluency_score', 0))
+                    })
+                self.evaluation_results['sentences'] = sentences
+                print(f"中文评测结果解析成功: {self.evaluation_results}")
+            else:
+                print(f"未找到中文评分节点: {xml_content[:200]}")  # 打印前200字符调试
+                self.evaluation_results = {'error': '未找到read_sentence节点'}
+        except ET.ParseError as e:
+            print(f"XML解析失败: {e}, 内容: {xml_content[:200]}")
+            self.evaluation_results = {'error': f'解析失败: {str(e)}'}
+        """解析评测结果XML并提得分信息"""
        try:
            print(xml_content)
            root = ET.fromstring(xml_content)
@@ -266,7 +282,7 @@ class XunFeiAudioEvaluator_cn:
        return descriptions.get(dp_message, f"未知({dp_message})")

    def run_evaluation(self):
-        """运行评测"""
+        """运行评测（修复SSL配置）"""
        start_time = datetime.now()
        websocket.enableTrace(False)

@@ -279,7 +295,12 @@ class XunFeiAudioEvaluator_cn:
            on_open=self.on_open
        )

-        ws_entity.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+        # 修复SSL配置，启用证书验证
+        import certifi
+        ws_entity.run_forever(sslopt={
+            "cert_reqs": ssl.CERT_REQUIRED,
+            "ca_certs": certifi.where()
+        })

        end_time = datetime.now()
        evaluation_time = end_time - start_time
@@ -290,15 +311,15 @@ class XunFeiAudioEvaluator_cn:

 # 使用示例
 if __name__ == '__main__':
-    # 配置参数
-    # appid = "5b83f8d6"
-    # api_secret = "604fa6cb9c5ab664a0d153fe0ccc6802"
-    # api_key = "5beb887923204000bfcb402046bb05a6"
+    # 中文使用示例
    appid = XF_APPID
    api_secret = XF_APISECRET
    api_key = XF_APIKEY
-    # audio_file = "./1.mp3"
-    audio_file = r'D:\dsWork\dsProject\dsLightRag\static\audio\audio_afc0a96e382c428cba2f00e3f71e4e8f.mp3'
+    audio_file = r'D:\dsWork\dsProject\dsLightRag\static\audio\test_cn.wav'  # 中文测试音频
+    txt = "大家好。很高兴认识你们。今天天气很好。我正在使用这个工具练习中文发音。"
+    evaluator = XunFeiAudioEvaluator_cn(appid, api_key, api_secret, audio_file, txt)
+    results, eval_time = evaluator.run_evaluation()
+    print(evaluator.get_evaluation_summary())
    # 创建评测器实例
    txt="Hello everyone! Nice to meet you. Today is a beautiful day. I am learning English pronunciation with this tool."
    evaluator = XunFeiAudioEvaluator_cn(appid, api_key, api_secret, audio_file, "english", txt)