From 6649c9d74386aa48cbc0cec1a0741266f2e7fefe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Mon, 17 Feb 2025 08:09:06 +0800 Subject: [PATCH] 'commit' --- AI/T3_TextSummarize.py | 105 +++++++++++------- .../T3_TextSummarize.cpython-310.pyc | Bin 5470 -> 5856 bytes 2 files changed, 62 insertions(+), 43 deletions(-) diff --git a/AI/T3_TextSummarize.py b/AI/T3_TextSummarize.py index fc559a23..b48ad58c 100644 --- a/AI/T3_TextSummarize.py +++ b/AI/T3_TextSummarize.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- from pathlib import Path -from typing import Optional, Tuple +from typing import Optional, Tuple, Iterator from openai import OpenAI, APIError, APITimeoutError import time import httpx class ContentAnalyzer: - """课程内容分析器(长超时版)""" + """课程内容分析器(流式版本)""" def __init__( self, @@ -15,7 +15,7 @@ class ContentAnalyzer: base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1", model: str = "deepseek-r1", max_retries: int = 10, - initial_timeout: int = 300 # 初始超时改为300秒 + initial_timeout: int = 300 ): self._show_progress("🔧", "初始化分析器...", level=0) self.client = OpenAI(api_key=api_key, base_url=base_url) @@ -32,56 +32,64 @@ class ContentAnalyzer: def _check_network(self): try: - with httpx.Client(timeout=30) as client: # 网络检查超时延长到30秒 + with httpx.Client(timeout=30) as client: client.get("https://dashscope.aliyuncs.com") self._show_progress("🌐", "网络连接正常", level=1) except Exception as e: self._show_progress("❌", f"网络异常: {str(e)}", level=1) raise - def analyze_content( + def _retry_delay(self, attempt: int) -> int: + """指数退避延迟""" + return min(2 ** attempt, 60) # 最大延迟60秒 + + def analyze_content_stream( self, content: str, prompt_template: str = "帮我梳理:这节课分了几个部分,每部分的名称和开始的时间是多少:{}" - ) -> Tuple[bool, str]: + ) -> Iterator[Tuple[bool, str]]: + """流式分析内容""" for attempt in range(self.max_retries + 1): try: - # 动态计算超时时间:60s, 65s, 70s... current_timeout = self.initial_timeout + attempt * 5 - self._show_progress("⏱️", f"尝试 {attempt + 1}/{self.max_retries} (超时: {current_timeout}s)") + self._show_progress("⏱️", f"尝试 {attempt + 1}/{self.max_retries} (超时: {current_timeout}s)", level=2) full_prompt = prompt_template.format(content) - completion = self.client.chat.completions.create( + stream = self.client.chat.completions.create( model=self.model, messages=[{'role': 'user', 'content': full_prompt}], - timeout=current_timeout + timeout=current_timeout, + stream=True # 启用流式模式 ) - if not completion.choices: - self._show_progress("⚠️", "API返回空响应", level=2) - return False, "API响应中未包含有效结果" + buffer = [] + for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: + content_chunk = chunk.choices[0].delta.content + buffer.append(content_chunk) + yield True, content_chunk # 实时返回每个片段 - self._show_progress("✅", "请求成功", level=2) - return True, completion.choices[0].message.content + # 返回完整结果 + if buffer: + yield True, ''.join(buffer) + return - except APITimeoutError: + except APITimeoutError as e: if attempt < self.max_retries: - delay = 2 ** attempt + delay = self._retry_delay(attempt) self._show_progress("⏳", f"{delay}s后重试...", level=2) time.sleep(delay) else: - self._show_progress("❌", "超过最大重试次数", level=2) - return False, f"API请求超时,已重试{self.max_retries}次" + yield False, f"API请求超时,已重试{self.max_retries}次" + return except APIError as e: - self._show_progress("🚨", f"API错误: {str(e)}", level=2) - return False, f"API调用失败: {str(e)}" + yield False, f"API错误: {str(e)}" + return except Exception as e: - self._show_progress("💥", f"意外错误: {str(e)}", level=2) - return False, f"未处理的异常: {str(e)}" - - return False, "未知错误" + yield False, f"未处理的异常: {str(e)}" + return def analyze_file( self, @@ -89,16 +97,16 @@ class ContentAnalyzer: output_path: Optional[Path] = None, encoding: str = 'utf-8' ) -> Tuple[bool, str]: - """处理文件全流程""" + """处理文件全流程(流式版本)""" try: self._show_progress("📂", f"开始处理文件: {file_path}", level=0) - # 检查文件 + # 文件验证 self._show_progress("🔍", "验证文件...", level=1) if not file_path.exists(): self._show_progress("❌", "文件不存在", level=2) return False, f"文件不存在: {file_path}" - if file_path.stat().st_size > 10 * 1024 * 1024: # 10MB警告 + if file_path.stat().st_size > 10 * 1024 * 1024: self._show_progress("⚠️", "注意:大文件可能影响处理速度", level=2) # 读取内容 @@ -109,45 +117,56 @@ class ContentAnalyzer: self._show_progress("🔠", "解码失败,尝试GBK编码...", level=2) content = file_path.read_text(encoding='gbk') - # 分析内容 - self._show_progress("🧠", "开始分析...", level=1) - success, result = self.analyze_content(content) + # 流式分析 + self._show_progress("🧠", "开始流式分析...", level=1) + result_buffer = [] + has_error = False + error_msg = "" + + for status, chunk in self.analyze_content_stream(content): + if not status: + has_error = True + error_msg = chunk + break + print(chunk, end='', flush=True) # 实时输出 + result_buffer.append(chunk) - if not success: - return False, result + if has_error: + self._show_progress("❌", f"分析失败: {error_msg}", level=1) + return False, error_msg + + final_result = ''.join(result_buffer) # 保存结果 if output_path: self._show_progress("💾", f"保存到: {output_path}", level=1) try: output_path.parent.mkdir(parents=True, exist_ok=True) - output_path.write_text(result, encoding=encoding) + output_path.write_text(final_result, encoding=encoding) self._show_progress("✅", "保存成功", level=2) except Exception as e: self._show_progress("❌", f"保存失败: {str(e)}", level=2) return False, f"结果保存失败: {str(e)}" self._show_progress("🎉", "处理完成!", level=0) - return True, result + return True, final_result except Exception as e: self._show_progress("💣", f"严重错误: {str(e)}", level=1) return False, f"文件处理失败: {str(e)}" -def analyzer_action(input_file,output_file): +def analyzer_action(input_file, output_file): print("\n" + "=" * 50) print(" 🚀 长春云校视频课程智能打标记系统 ".center(50, "✨")) print("=" * 50) - analyzer = ContentAnalyzer(initial_timeout=300) # 显式设置初始超时 - success, result = analyzer.analyze_file(input_file, output_file) + analyzer = ContentAnalyzer(initial_timeout=300) + success, result = analyzer.analyze_file(Path(input_file), Path(output_file)) print("\n" + "=" * 50) if success: - print("✅ 分析成功!结果如下:\n") - print(result) + print("\n✅ 分析成功!结果已保存至:", output_file) else: - print(f"❌ 分析失败:{result}") - print("=" * 50) - + print(f"\n❌ 分析失败:{result}") + print("=" * 50) \ No newline at end of file diff --git a/AI/__pycache__/T3_TextSummarize.cpython-310.pyc b/AI/__pycache__/T3_TextSummarize.cpython-310.pyc index 2df9897cad9eda49c3d6fab55ef19b490e50fa7b..1c4878154d09d0551d3b3d2743a38c5ba15926a3 100644 GIT binary patch delta 3078 zcma)8YiwLc6`q-U_rCUJuQ#?gekOq=>k!i@35lYDh|*Gp(l#O~QCE=dX6LTG&OV&E zcN=Wo%{n*__hI62DMBS~9I1#_rL;6qh+;M3A42><@QW)zCCWN6{0Q(1L=th%y>{(H z{lTv8nVB=^%sDf2zH{a;10QXTRijaZ!0%V}_lI_NU5ItlBNw*xG|Hh&h!Re?R8q=n zMlEX@t*mEspC^}$i~)0UDVzyEL3oHOrwLakSti0&uAL^CD3=eCw7!Tdr>Q3&$l1eb z$UujQM0mnrjZ_&Q9(44V;CF0d3yk%Gc|c<<>@CaU-8 z{XEUrnK8cJjGM6(;TwGUkEIJ5{}lf;$hPo4@O!|E@2gg9vtsWBU&qExQQJg_`wmS- z7Jh!JIe-4vi&G01e|+Wl^hv73VT+>0-5~8gpg`AC@}(kZidN(&khCE?mdy{F`D59N zX`iTyV`;^cEwfY*fjWvksPGmMUGCdbgRXJ6$Wy!4qC_8(ek5yvcuKLt!IG!j#jy!!bzWWW3S`N-{g%{>BWB1ZC7^g=s-pN@o(l$|0X=ivf=E7zkDaoQS;$NQf@=} zV#8B&wr!S2ZBG%VT@#f&1M%e__9giC!Dq#Q*o0Guwh_)I8Jwt8!;@He<=n!n?=4PD zES|dFy!OHE>z5W$aq&?g5G7K%SV^_xI*T8w46S;~zK0LMc07#r_? z=(+}13%b{plaJ@oyX9X5I$`+o@L9j3gq#9@9Wp{W?I4r%G4hQ+*QN2Jz+sNWS(0>+ zx(p+CO6^m)#`S5*eM{|X7*2?XFR~%($UMTM({f!IB6ZbKV1=H#=4c~^ID(Wz%#DOc zA`g!0x}(lT9sM!|R7D)Uz`jN(Ilk9XMG@AE&&3?|G7Y$gVTQLHBr>t%V{cPZHz;Y0 zZ+9eg`xp`D9WvMANSA3n3>=BKI^iD>o|t8UyOh|ia|tJmrYZ^E#*?TA5^H9qWdqNF zfx2-L7Dhvd$@2KP7Vu9{Fy1!y>%jPUf;ZZDyPb55Nmyu>eeyD^RM(6(KUQty9sjT9 z3E!P`XAnU?{F+l|7ha!fs%OtMe{xmyy1VtAN5nR`OpyfQ$u-LqY39kIS~A7wfORM> z`|yGsW?EKm$g~6i8W050Z|2GZLV$SW=^6&fjq_J(QE08^%(oZMP2Ya^)xp6*cb_q! zV#q*la^E%9uWR>0B3BtQR}e0IXeouNDCca^=_=vAqX=nF%@0?LdDHSj5Vv;DcgRz6 zqmb>mCy!K%6`_Hg*nwmnYJ{wkX^sjN8C#Kj21q&*AOrU!b|FEGxG#iz8v+63DMxFC zf+@UEez;aSmWk$r6b5+*hGGy!(1YCAYFr_=Jr`uT?6O91DyU(76$qP1D2&GF05zCF zWol3cf90;MYBUM_9R{){WT}N}GzKCDZKn~MWGysuPn(E^k`!hpVk~Icec2n|=%yll zYBsRs?vCtgLr`KR@>9qG0QUHO_gj(QHULq8vbw@M0a;{DapcR0(9W;3x&{c+tdl%d zR~)^rLTE>A)zPs!@U_ztB~EBg3-EPZIEt8speF!`E=R?Pg<2(}6c0OUfgL6X3GbZK zoe+S?aG3a)FWu>+F~l4#2Z4%fo#A3KyMfC5JY(~`MvUveYgw6gHZ&iUpQE^4n>gVR7;+rwxqoyH1Cns1!GHFI|1d*^Om z`=EK|O(=%9-hBDa{M72IcxV1acP6%(Cf%#CbwTOb?0yvMq}$y8#QMTDERr`~o4fPN z^KMW4S^AV~#rHmk^{hER(frZN!BPRj0eIr`FmnjNEFoG#g;LELu3>>$$^mcu^!Cqw zxTn7+1M44`zVl%XL3;E0Z1bJ(H>cj))9?N@-cH-yf5*Qo#YKmEzGY9#evl9wkYK2Z z$K2nv^feG+KGv3D_9QaCg5)VAh`LNvm{zT1X9J*R^x>S9HHD}OPxqT_*&4!}lCx`; zcmg!MNTCSWf@*|%J_a8KU&IGrbll7qiY4<1C|S?DUuxaZEd$bInqWaI1I{8;X6*nj zgSpk#wY0^3sdeBHgp9yV3j~nB%q8%A(o|sChh`@>>}m03Y+K!%t%J{FQt_lByi$6g z=VvaF!k(XlLfFFt_2i?~YDxG9hLu)eeiZ2As$HzSHguT4&pO}^C$175Om+CUUB_^S8 zIg;24eGTTA`$*d#8cX}c_f;(o#JF+sOn(p<3sW=AmoD9zKXvPxyYTwO<}cn4g5vhM zD|hB!t6`lmZp^;2B;EAG2qw}YKQiDE2M;T_Q`}ClVcdT@=yOPtjL1QIj+HB8kKX+%2y E2e%gzhX4Qo delta 2693 zcmZuzTWlN06`h%Va!HXANl_B@pd=@XDLZoe;kqyi*LH%YO#oY|-56$D2sCG9Q{qFN zT|$wtGL6Wtm4^_kBd3i`Ka|>y(=@Qt6snLI^-ufPq6zx9D1ssoDG4axezXOO)0cke+rAL&!l{LA-N!JZh)Ovnucp+z zmeTTiO80zn&X@ARoSZXK#xsa2R6UKTI!jW1s!{zkN(HEV2qk?RB5M-6@^IQdktD8u zaKg?O3+bGz94<}d%!QS`fr+R zXuvy&#rdD5GLG_t^4zWtf#?w?AxspEt7Z!nD!D#8n>Q^xou6QFG3l93F^YW-ODT5$>&^^nRba4G7=y+FCPs2a6bjN^)W`s zMh+Uo6t|&S{51OEr!&&@cfms(iIOO4Lo>32uJRE*QC=b#IpnJB;3Yhx07e;8$FwmW zMn0-L3iVaUaqPhNo|k4+M;$Y)Z^N8K{o=dR;Jk#<5d?i0IwWdJw3)VS`e|6;X`};< z`NsmI~GGM99iaC?*fKQv0ynamS&{IY)zKo$r)}282mOiX z1%q8JbE25dzzg$;D&8g2gy|KNij_0X2_{}6+b+yDFi9Wt2`^5HQw$Fkyw@AQ`|0F|dpc zOt5-SRhzMnn@KwkfLF&c9DrYOEZ++%aomsfQ$hG8o+|lVmZuZk2_*Pifn5GO%2AP*h<(RfDFhZ5Ak@QQCaYLy2Bwt z13;HSndd!)81p$Qs7KiQC8D}$H#O?`BdCxBMi^7S2S#WR-|gkqX}QPtJK8Mp5=eF* z48oCUcpp%rcSbv@iDgl5B>3PZ0kx-6y$p?SF5N5kpZmqSJlq!gSR*SOd)`L2L$Svj z?njF~Tq-$GYUO)E?fgV&D~|AmP+!8+XRpUY@M3FkoL&8Bd39yJ`rgl~l^bjq|0I-9 z*wdLQ&kYR?ZFcnUJOZrVxg-+I-P+xI7^tmH zq!~1Yr}62 zlX|^f_d)>MCyZz^Pc>Z4DrKPeEpc>MrJVhxjT+c&)X{9td;wyuXJKpS!hLEG%g~Md zPvJNY^7ZhbEKVEV5{b4aF%xeI9z)_m4zO>EXNd2Q480(j-E&w=WnaA*Y{RdabwqrRxF)6G@r%!wmf6fDdajCKknp`CnVN z#@GQdJ}69MlbyUT79Ds6oR$zTpg$1bAYh3sMj#Ra%)Sp($tHd@)_W|a7AMR?I?Ke) v>Z0{P%Gg%n^Gb?{hsr#7m#+See36!N<{qKsmQY6!861ULIu1&x93cM(`yQFr