You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

173 lines
6.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
from typing import Optional, Tuple, Iterator
from openai import OpenAI, APIError, APITimeoutError
import time
import httpx
from pathlib import Path
from Config.Config import LLM_API_KEY, LLM_BASE_URL, LLM_MODEL_NAME
class ContentAnalyzer:
"""课程内容分析器(流式版本)"""
def __init__(
self,
api_key: str = LLM_API_KEY,
base_url: str = LLM_BASE_URL,
model: str = LLM_MODEL_NAME,
max_retries: int = 10,
initial_timeout: int = 300
):
self._show_progress("🔧", "初始化分析器...", level=0)
self.client = OpenAI(api_key=api_key, base_url=base_url)
self.model = model
self.max_retries = max_retries
self.initial_timeout = initial_timeout
self._check_network()
self._show_progress("", "分析器准备就绪", level=0)
def _show_progress(self, emoji: str, message: str, level: int = 1):
indent = " " * level
timestamp = time.strftime("%H:%M:%S")
print(f"{indent}{emoji} [{timestamp}] {message}")
def _check_network(self):
try:
with httpx.Client(timeout=30) as client:
client.get("https://dashscope.aliyuncs.com")
self._show_progress("🌐", "网络连接正常", level=1)
except Exception as e:
self._show_progress("", f"网络异常: {str(e)}", level=1)
raise
def _retry_delay(self, attempt: int) -> int:
"""指数退避延迟"""
return min(2 ** attempt, 60) # 最大延迟60秒
def analyze_content_stream(
self,
content: str,
prompt_template: str = "帮我梳理:这节课分了几个部分,每部分的名称和开始的时间是多少:{}"
) -> Iterator[Tuple[bool, str]]:
"""流式分析内容"""
for attempt in range(self.max_retries + 1):
try:
current_timeout = self.initial_timeout + attempt * 5
self._show_progress("⏱️", f"尝试 {attempt + 1}/{self.max_retries} (超时: {current_timeout}s)", level=2)
full_prompt = prompt_template.format(content)
stream = self.client.chat.completions.create(
model=self.model,
messages=[{'role': 'user', 'content': full_prompt}],
timeout=current_timeout,
stream=True # 启用流式模式
)
buffer = []
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
content_chunk = chunk.choices[0].delta.content
buffer.append(content_chunk)
yield True, content_chunk # 实时返回每个片段
# 返回完整结果
if buffer:
yield True, ''.join(buffer)
return
except APITimeoutError as e:
if attempt < self.max_retries:
delay = self._retry_delay(attempt)
self._show_progress("", f"{delay}s后重试...", level=2)
time.sleep(delay)
else:
yield False, f"API请求超时已重试{self.max_retries}"
return
except APIError as e:
yield False, f"API错误: {str(e)}"
return
except Exception as e:
yield False, f"未处理的异常: {str(e)}"
return
def analyze_file(
self,
file_path: Path,
output_path: Optional[Path] = None,
encoding: str = 'utf-8'
) -> Tuple[bool, str]:
"""处理文件全流程(流式版本)"""
try:
self._show_progress("📂", f"开始处理文件: {file_path}", level=0)
# 文件验证
self._show_progress("🔍", "验证文件...", level=1)
if not file_path.exists():
self._show_progress("", "文件不存在", level=2)
return False, f"文件不存在: {file_path}"
if file_path.stat().st_size > 10 * 1024 * 1024:
self._show_progress("⚠️", "注意:大文件可能影响处理速度", level=2)
# 读取内容
self._show_progress("📖", "读取文件内容...", level=1)
try:
content = file_path.read_text(encoding=encoding)
except UnicodeDecodeError:
self._show_progress("🔠", "解码失败尝试GBK编码...", level=2)
content = file_path.read_text(encoding='gbk')
# 流式分析
self._show_progress("🧠", "开始流式分析...", level=1)
result_buffer = []
has_error = False
error_msg = ""
for status, chunk in self.analyze_content_stream(content):
if not status:
has_error = True
error_msg = chunk
break
print(chunk, end='', flush=True) # 实时输出
result_buffer.append(chunk)
if has_error:
self._show_progress("", f"分析失败: {error_msg}", level=1)
return False, error_msg
final_result = ''.join(result_buffer)
# 保存结果
if output_path:
self._show_progress("💾", f"保存到: {output_path}", level=1)
try:
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(final_result, encoding=encoding)
self._show_progress("", "保存成功", level=2)
except Exception as e:
self._show_progress("", f"保存失败: {str(e)}", level=2)
return False, f"结果保存失败: {str(e)}"
self._show_progress("🎉", "处理完成!", level=0)
return True, final_result
except Exception as e:
self._show_progress("💣", f"严重错误: {str(e)}", level=1)
return False, f"文件处理失败: {str(e)}"
def analyzer_action(input_file, output_file):
print("\n" + "=" * 50)
print(" 🚀 长春云校视频课程智能打标记系统 ".center(50, ""))
print("=" * 50)
analyzer = ContentAnalyzer(initial_timeout=300)
success, result = analyzer.analyze_file(Path(input_file), Path(output_file))
print("\n" + "=" * 50)
if success:
print("\n✅ 分析成功!结果已保存至:", output_file)
else:
print(f"\n❌ 分析失败:{result}")
print("=" * 50)