'commit'

5 months ago · 06de3f0f74
parent 8c5118effd
commit 06de3f0f74
7 changed files with 175 additions and 0 deletions
--- a/ppt-generator-master/.idea/vcs.xml
+++ b/ppt-generator-master/.idea/vcs.xml
@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/ppt-generator-master/ALiYun.py
+++ b/ppt-generator-master/ALiYun.py
@ -0,0 +1,38 @@
+import os
+from openai import OpenAI
+# https://help.aliyun.com/zh/model-studio/developer-reference/deepseek?spm=a2c4g.11186623.0.0.274b1d1c4GY0Zd
+from pathlib import Path
+
+API_KEY = "sk-01d13a39e09844038322108ecdbd1bbc"
+client = OpenAI(
+    # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key="sk-xxx",
+    api_key=API_KEY,  # 如何获取API Key：https://help.aliyun.com/zh/model-studio/developer-reference/get-api-key
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
+)
+
+# 读取文本文件内容
+file_path = Path(r"D:\dsWork\QingLong\音频文本.txt")
+
+if file_path.exists():
+    # 自动处理文件编码（默认utf-8），读取内容
+    content = file_path.read_text(encoding='utf-8')
+else:
+    print(f"文件 {file_path} 不存在")
+    exit(0)
+
+# Send request.
+
+completion = client.chat.completions.create(
+    model="deepseek-v3",  # 此处以 deepseek-r1 为例，可按需更换模型名称。
+    messages=[
+        {'role': 'user', 'content': "帮我梳理：这节课分了几个部分，每部分的名称和开始的时间是多少："+content}
+    ]
+)
+
+# 通过reasoning_content字段打印思考过程
+#print("思考过程：")
+#print(completion.choices[0].message.reasoning_content)
+
+# 通过content字段打印最终答案
+print("最终答案：")
+print(completion.choices[0].message.content)
--- a/ppt-generator-master/HuaWeiApi.py
+++ b/ppt-generator-master/HuaWeiApi.py
@ -0,0 +1,47 @@
+
+# coding=utf-8
+
+import requests
+import json
+
+from pathlib import Path
+
+if __name__ == '__main__':
+    url = "https://infer-modelarts-cn-southwest-2.modelarts-infer.com/v1/infers/fd53915b-8935-48fe-be70-449d76c0fc87/v1/chat/completions"
+    API_KEY = 'WooxVHbV5-5nEuFJtMxaktMVo07Ic3iKbq_y4wHsjRvmSgbCehcGW62RmWLPvi_WoLzwpoNbCGmrksjSAlykGg'
+
+
+    # 读取文本文件内容
+    file_path = Path(r"D:\dsWork\QingLong\音频文本.txt")
+
+    if file_path.exists():
+        # 自动处理文件编码（默认utf-8），读取内容
+        content = file_path.read_text(encoding='utf-8')
+    else:
+        print(f"文件 {file_path} 不存在")
+        exit(0)
+
+    # Send request.
+    headers = {
+        'Content-Type': 'application/json',
+        'Authorization': 'Bearer '+API_KEY
+    }
+    data = {
+        "model": "DeepSeek-V3",
+        "max_tokens": 20,
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "整理下面的JSON文件内容，输出这段话共分几部分，都是哪个时间开始的："+content}
+        ],
+        # 是否开启流式推理, 默认为False, 表示不开启流式推理
+        "stream": False,
+        # 在流式输出时是否展示使用的token数目。只有当stream为True时改参数才会生效。
+        # "stream_options": { "include_usage": True },
+        # 控制采样随机性的浮点数，值较低时模型更具确定性，值较高时模型更具创造性。"0"表示贪婪取样。默认为1.0。
+        "temperature": 1.0
+    }
+    resp = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
+
+    # Print result.
+    print(resp.status_code)
+    print(resp.text)
--- a/ppt-generator-master/ZhengLiWenDang.py
+++ b/ppt-generator-master/ZhengLiWenDang.py
@ -0,0 +1,20 @@
+import json
+
+# 从外部文件读取 JSON 数据
+file_path = r'D:\dsWork\QingLong\音频文本.txt'  # 替换为你的文件路径
+with open(file_path, 'r', encoding='utf-8') as file:
+    json_data = file.read()
+
+# 解析 JSON 数据
+data = json.loads(json_data)
+
+# 提取 Text 属性并组装成文稿
+text_content = [item['Text'] for item in data]
+manuscript = ' '.join(text_content)  # 将文本拼接成一个完整的文稿
+
+# 输出或保存文稿
+print(manuscript)  # 打印到控制台
+
+# 如果需要保存到文件
+with open('文稿.txt', 'w', encoding='utf-8') as output_file:
+    output_file.write(manuscript)
--- a/ppt-generator-master/uploadQWenFile.py
+++ b/ppt-generator-master/uploadQWenFile.py
@ -0,0 +1,37 @@
+import os
+from pathlib import Path
+from openai import OpenAI
+
+# pip install -U openai
+API_KEY = "sk-01d13a39e09844038322108ecdbd1bbc"
+client = OpenAI(
+    api_key=API_KEY,  # 如果您没有配置环境变量，请在此处替换您的API-KEY
+    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",  # 填写DashScope服务base_url
+)
+
+# 修改文件路径为原始字符串（注意路径前的r）
+file_object = client.files.create(file=Path(r"D:\dsWork\QingLong\音频文本.txt"), purpose="file-extract")
+print(file_object.id)
+file_id = file_object.id
+
+# 初始化messages列表
+completion = client.chat.completions.create(
+    model="qwen-long",
+    messages=[
+        {'role': 'system', 'content': 'You are a helpful assistant.'},
+        {'role': 'system', 'content': 'fileid://' + file_id},
+        {'role': 'user', 'content': '不要使用markdown格式输出，只输出原文文字，你的任务是按语义进行分段。'}
+    ],
+    stream=True,
+    stream_options={"include_usage": True}
+)
+
+full_content = ""
+for chunk in completion:
+    if chunk.choices and chunk.choices[0].delta.content:
+        # 拼接输出内容
+        full_content += chunk.choices[0].delta.content
+        #print(chunk.model_dump())
+
+# 修改最后的打印语句（去掉大括号）
+print(full_content)
--- a/ppt-generator-master/总结.txt
+++ b/ppt-generator-master/总结.txt
@ -0,0 +1,31 @@
+1、DeepSeek官方的API提供了文件上传并让大模型阅读文件的能力，
+其它第三方，比如阿里云、华为云都阉割了这一功能。
+这个功能的意义是很大的，比如：我需要提供三篇优质的教案让大模型阅读并且依照这三篇
+内容写一份新的教案，但三篇教案的字符数量可能就在50K左右了，直接用prompt是行不通的。
+
+作为 DeepSeek-V3，我的上下文窗口支持 128K tokens，这意味着我可以处理非常长的输入和输出。具体来说：
+
+输入限制：单次输入的 prompt 最长可以支持 128K tokens。
+
+输出限制：单次输出的内容最长也可以支持 128K tokens。
+
+关于 Tokens 的说明：
+1 token 大约等于 0.75 个英文单词，或者 1.5 个中文字符。
+
+128K tokens 大约相当于：
+
+英文：96,000 个单词。
+
+中文：192,000 个汉字。
+
+适用场景：
+长文档处理：可以一次性输入很长的文本（如书籍、论文、报告等）进行分析或总结。
+
+复杂任务：可以处理需要大量上下文的任务，例如长代码分析、多轮对话、复杂逻辑推理等。
+
+生成长内容：可以生成较长的文章、故事、代码等。
+
+注意事项：
+虽然我的上下文窗口很大，但过长的输入可能会导致处理速度变慢。
+
+如果输入内容超过 128K tokens，需要分段处理。
--- a/ppt-generator-master/文稿.txt
+++ b/ppt-generator-master/文稿.txt