You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

159 lines
6.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
from pathlib import Path
from typing import Optional, Tuple
from openai import OpenAI, APIError, APITimeoutError
import time
import httpx
class ContentAnalyzer:
"""课程内容分析器(长超时版)"""
def __init__(
self,
api_key: str = "sk-01d13a39e09844038322108ecdbd1bbc",
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
model: str = "deepseek-v3",
max_retries: int = 10,
initial_timeout: int = 120 # 初始超时改为120秒
):
self._show_progress("🔧", "初始化分析器...", level=0)
self.client = OpenAI(api_key=api_key, base_url=base_url)
self.model = model
self.max_retries = max_retries
self.initial_timeout = initial_timeout
self._check_network()
self._show_progress("", "分析器准备就绪", level=0)
def _show_progress(self, emoji: str, message: str, level: int = 1):
indent = " " * level
timestamp = time.strftime("%H:%M:%S")
print(f"{indent}{emoji} [{timestamp}] {message}")
def _check_network(self):
try:
with httpx.Client(timeout=30) as client: # 网络检查超时延长到30秒
client.get("https://dashscope.aliyuncs.com")
self._show_progress("🌐", "网络连接正常", level=1)
except Exception as e:
self._show_progress("", f"网络异常: {str(e)}", level=1)
raise
def analyze_content(
self,
content: str,
prompt_template: str = "帮我梳理:这节课分了几个部分,每部分的名称和开始的时间是多少:{}"
) -> Tuple[bool, str]:
for attempt in range(self.max_retries + 1):
try:
# 动态计算超时时间60s, 65s, 70s...
current_timeout = self.initial_timeout + attempt * 5
self._show_progress("⏱️", f"尝试 {attempt + 1}/{self.max_retries} (超时: {current_timeout}s)")
full_prompt = prompt_template.format(content)
completion = self.client.chat.completions.create(
model=self.model,
messages=[{'role': 'user', 'content': full_prompt}],
timeout=current_timeout
)
if not completion.choices:
self._show_progress("⚠️", "API返回空响应", level=2)
return False, "API响应中未包含有效结果"
self._show_progress("", "请求成功", level=2)
return True, completion.choices[0].message.content
except APITimeoutError:
if attempt < self.max_retries:
delay = 2 ** attempt
self._show_progress("", f"{delay}s后重试...", level=2)
time.sleep(delay)
else:
self._show_progress("", "超过最大重试次数", level=2)
return False, f"API请求超时已重试{self.max_retries}"
except APIError as e:
self._show_progress("🚨", f"API错误: {str(e)}", level=2)
return False, f"API调用失败: {str(e)}"
except Exception as e:
self._show_progress("💥", f"意外错误: {str(e)}", level=2)
return False, f"未处理的异常: {str(e)}"
return False, "未知错误"
def analyze_file(
self,
file_path: Path,
output_path: Optional[Path] = None,
encoding: str = 'utf-8'
) -> Tuple[bool, str]:
"""处理文件全流程"""
try:
self._show_progress("📂", f"开始处理文件: {file_path}", level=0)
# 检查文件
self._show_progress("🔍", "验证文件...", level=1)
if not file_path.exists():
self._show_progress("", "文件不存在", level=2)
return False, f"文件不存在: {file_path}"
if file_path.stat().st_size > 10 * 1024 * 1024: # 10MB警告
self._show_progress("⚠️", "注意:大文件可能影响处理速度", level=2)
# 读取内容
self._show_progress("📖", "读取文件内容...", level=1)
try:
content = file_path.read_text(encoding=encoding)
except UnicodeDecodeError:
self._show_progress("🔠", "解码失败尝试GBK编码...", level=2)
content = file_path.read_text(encoding='gbk')
# 分析内容
self._show_progress("🧠", "开始分析...", level=1)
success, result = self.analyze_content(content)
if not success:
return False, result
# 保存结果
if output_path:
self._show_progress("💾", f"保存到: {output_path}", level=1)
try:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(result, encoding=encoding)
self._show_progress("", "保存成功", level=2)
except Exception as e:
self._show_progress("", f"保存失败: {str(e)}", level=2)
return False, f"结果保存失败: {str(e)}"
self._show_progress("🎉", "处理完成!", level=0)
return True, result
except Exception as e:
self._show_progress("💣", f"严重错误: {str(e)}", level=1)
return False, f"文件处理失败: {str(e)}"
def main():
print("\n" + "=" * 50)
print(" 🚀 长春云校视频课程智能打标记系统 ".center(50, ""))
print("=" * 50)
analyzer = ContentAnalyzer(initial_timeout=120) # 显式设置初始超时
input_file = Path(r"D:\dsWork\QingLong\AI\音频文本.txt")
output_file = Path(r"D:\dsWork\QingLong\AI\分析结果.txt")
success, result = analyzer.analyze_file(input_file, output_file)
print("\n" + "=" * 50)
if success:
print("✅ 分析成功!结果如下:\n")
print(result)
else:
print(f"❌ 分析失败:{result}")
print("=" * 50)
if __name__ == '__main__':
main()