From dc7189af7a69d6abcf3d60de88c5350593a4b7b6 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 10 Jul 2025 15:32:51 +0800 Subject: [PATCH] 'commit' --- dsLightRag/ShiTi/T3_DocxToMd.py | 2 +- dsLightRag/Util/DocxUtil.py | 8 +++++--- .../Util/__pycache__/DocxUtil.cpython-310.pyc | Bin 1345 -> 1365 bytes .../media/image1.png | Bin 0 -> 5807 bytes 4 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 dsLightRag/static/Images/2ddc4eb15b06461ab1d56444d012d66c/media/image1.png diff --git a/dsLightRag/ShiTi/T3_DocxToMd.py b/dsLightRag/ShiTi/T3_DocxToMd.py index b47cdae9..307e15ab 100644 --- a/dsLightRag/ShiTi/T3_DocxToMd.py +++ b/dsLightRag/ShiTi/T3_DocxToMd.py @@ -7,7 +7,7 @@ async def main(): # 要处理的文件路径 file_path = "Docx/《动能定理》巩固练习.docx" # 转换docx为md - get_docx_content_by_pandoc(file_path, '../Word转试题.md') + get_docx_content_by_pandoc(file_path, '../Word转试题.md',extract_media='../static/Images/') if __name__ == "__main__": asyncio.run(main()) diff --git a/dsLightRag/Util/DocxUtil.py b/dsLightRag/Util/DocxUtil.py index b3b711e8..d5f1784f 100644 --- a/dsLightRag/Util/DocxUtil.py +++ b/dsLightRag/Util/DocxUtil.py @@ -3,7 +3,7 @@ import subprocess import uuid -def get_docx_content_by_pandoc(docx_file, output_file=None): +def get_docx_content_by_pandoc(docx_file, output_file=None, extract_media=None): # 最后拼接的内容 content = "" # output_file 设置为临时目录下的uuid.md @@ -12,9 +12,11 @@ def get_docx_content_by_pandoc(docx_file, output_file=None): prefix = docx_file.split(".")[0].split("/")[-1] temp_markdown = os.path.join('./static/markdown/', prefix + '.md') # 调用pandoc将docx文件转换成markdown - os.mkdir("./static/Images/" + file_name) + if extract_media is None: + os.mkdir("./static/Images/" + file_name) + extract_media = "./static/Images/" subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown, - '--extract-media=./static/Images/' + file_name]) + '--extract-media=' + extract_media + file_name]) # 读取然后修改内容,输出到新的文件 img_idx = 0 # 图片索引 with open(temp_markdown, 'r', encoding='utf-8') as f: diff --git a/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/DocxUtil.cpython-310.pyc index 7071172125726cf9096ae21a5cba3c6508240c43..a7cd04bea90de30d8ec5665846fb7d2d08b75c60 100644 GIT binary patch delta 670 zcmY*XO>fgc5S`iG#B0Y+lhWX(<>Q(lMJgvQzyT1PT17|*QG}|}Nueddt>e%hc9jD{ z>IET`16TVuls~|Y9{>{cw{T$A4G4C%v+vEkU9X?!OYl92k|+v*$H_2xdGaypPo-n? z5%C`Ky31?)0|5xF%pk`C_Jvgja!H~fWB~$yAyh>G5AuonP*s2i(6G*$6 z%01FXaTXCsTaoG9C*J@CRHKlhDb-Dt7L16&$zJ%SP+BhwINzWqExwl@Mpher51u`@ zzVY)~0d61n4<9fK=|X|&Q$`UjIaFrr)N&7fRY~4(rA`-1rVh?o{FhB#arvCuzy#^1 zQr<4m){IYJYP1r=JHh`MnlN9WRdzry_Gsk>SkFik(xn&}Oa8Fr)BHTvT_!BVpNf&3 zx#BWT^%w{g&$klc{%d_5#-ESa9yg!xGRWp(`jZ_%x;l3tat`DTBb`70UQnL@HfxBs zS9LWbXKUi9+358K$630)ll4Xe+S|T$`c=&H2IFm1WudMI_3wDW)H ZGCk8bM+r@agWKF(FxDJ2Xw+DjzX1~^nVSFr delta 624 zcmZWmKTi}v6n}4a_W$j{A$Tk(VnN`F<6jH_hnQ=lDYh@gTp zq)-Dz6jO~7s%w{Kth*mhYyvsPQxS`^)S$U9&iT;z+J6df_e`LJGOobx`LmA?nbqh< zh1m;63C&wHRqM21A9m=Dj@hZx%_);7m#le>EnYGc>M)y}Pt(&1O{4cc1sf6@tdXr>4;#gnyYd$y2ak9F diff --git a/dsLightRag/static/Images/2ddc4eb15b06461ab1d56444d012d66c/media/image1.png b/dsLightRag/static/Images/2ddc4eb15b06461ab1d56444d012d66c/media/image1.png new file mode 100644 index 0000000000000000000000000000000000000000..9e0e4c51a244d3e90b82d6ff1dee938f376207aa GIT binary patch literal 5807 zcmd^DcUMzSv<+RF2%$+41SA0|ia}bWcStT>iXb3L6NA)%6oV8&kt$L{5l8?9#o^z&Y9U~pR?xPi8Vp$u`=;7fj}Tu1AT2X5QrK8 zfv5^E&`=N|pPSDqFDhR%Jxx$~uKS7g98i(b8>QWadB~TbMx@< z@bdCrzI>UFkB^_9UqC=WP*Cv7l`BF*Lc+qrSFc_b5fKp;6%`W`yLRoGxVSh30)axI z5)u-Ul9Jc2Uzd`Sl9ra1k&%&=mA!G}#?6~I<>cgU-MR&X!Q|!TZ{NPHprD|rsHmi* zq^zv0qM`za!|&X=qpGT^rlzK@uCAe>p{c2Cc{uWw*rU}$J) zWMpJ)Y>Yr4kVqr|0463TrlzK5W@hH*<`xzfmX?-QR#tcK-nF*2wz09XwY9agv%7ci zp1r-jgM-8U`}Z9k9i5zVPkii(Pkj*f|mdHM2XY;0^?T->WyuU@}?9UmY6=FOXg zgoMPz#H6I8lw+7Zen{fB(L)u&}78sJOTohr{9V_>z*6($dnhva$~!K79Q6vAn#zqN3u{r%#_h zf3B>o{O`a2s;a80tE+2jYQB8=Qd?VFS65eGU*FKs(Ae16)YR16+}zUA(%RbE*4Fm* z>(_7JzJ34xy}iA?qobpgw9s+WPwX#>NJjOy1nw+}hgO-rnBX+1cIQ z-P_yS-`_ttI5<2!JUTi$K0ZD~$y=U$U?82&oMiCh<=ZEjq zppLj!wE***V5SphK;5wf1h*+A+#}P{q%pOo6lMOP18)4^U==ORIqjL>C@Nd3*5%&TJu?e_wcco9?dO48d}}_*K)PlJo!- z6z#$1+()3q9d5Z@z=M91wRT|=Beqmjse|?^RvCUV8h6thzb9g{9z0D8{7Mpd5CH+~y(9ji zyR%dPZur(r$?fC^ug_Y84ukd^$?!XstI3lrm zYa)h#!TIPgT+aAO3{5kM5I%9BXq5|;^3Djh~9Y~$?ThTe=~mjZ*K@6>tJp(CGBTPd>C_EttU z)`wghy8=pM>TTxlE;PtwHs^ICO)99!^L(xO+(yJ5A9)3h)ALN`Lw7vyvC{imM3=G2 zISzNg-!Y`pGICT=*kIgcXcDg24(5a(>WzA~4=Js#YFR}@Kq|PfkN%Ds=?YM5IYorNyJeriFG&9Lj=hez z#XE9E;sy#(8^C)Co4x$Z2OZ*!lH-Lbxr^!r-96$IwMw1kcBeh^uwd==OR5THcdHer z5D4G^YQ;VT68p@-1eE228-k?HV%Xjok+*-4tLPL45Zqtp84w9PIbS`a__pGPY6FKl zCPLjA{J)a4bwX~hOw_WZ2V14)HNz&tr{cPZ@}fz(##&0>UFaBB^oVb9+rub^y8Frv zFgq>ib8s<}3yr^oJ#L3v2Gd#bmr+4M5B<@U^Cw4B*K# z@?w{rHsHY?exQ{aR4{7)6NPgo(F>&`P1<6vZ3rgi(kDEI>~q``bBn+sXZXTK-6G=N zDQ^+UrfbWNiW-ctq+Dg)$E!tJjFLWj-jf$ji6c&HOA|CuI>bCKY?Ci+;kt=r=Cu0( zKERL6k~*RodQv#)&oZ)XB;@VpE}|BHWw87#lp_@@j$YxbnBh^&EFL~z&MulH`Hc}* zzNeUh&}K~4I2&ZiTv!O8yVKBH9D8OUyB?562Wuk+q~aCsMS?fV@VgT|pI(b^5gk(T zfmtD)67tMTD%Bmwyxc_XurUUXU8Iqb;69y4tb*K!g4mNPc8Sc7l2eAUfX;bgQV=-H zBeM(0N-P*Hrb3Vl*@Kz+!IV8STg@Z*255oGAr)(jD=SEjfrO1#qu_)93h6vTvN(DS zi;Hu;esv8WXa|0nmF-~mmDmB#AkqT-#GNw+LbY0L0cWX)=0aaT)V}S9C5E058@$BV zmc;F;1>Sv9$g6TRSTJ;22Q>MiOMDY2()RNivE$TZ{`reu2cK1r zHp?*qHscB6TPa5B4KlL^V7NbP^|#q(SX+vg{xg}GU(rOh-Pe`FW9t+`jhcU)axTK! z%wrT)q(@h|aE0gNJc?1nK^ZcH*rmW;(>>dA9>kDJ#f64)B5vr^D~UqIJ)!;R8U(%d zvSNeJo}%Und!+k>Y7S{zvK- zl0R%UVcR~-MCFIFFk&7OQ>$R>Tx-^OSZ-Q)r&utBLRW8fZJ771IJz>y|M+re5hHd? zZz?$I>L2gkz~4>*nW)DUwUNBm(zkp{=0aS#9##ZJ<6=+dqaxaso2FQYS24#=;|)be zeyD>F-D%BWs>UWO%egR&z_&+l=643;f)(g*7|1>{pvXmKywP{^w-46Lt)kq?{pyQC zM{9;#!DM`!@%XD)L&4L7VOYRWRYc*#NG+AO`!B$CvhiXC1P+OVgITJf|8n0H(OE%sZ{u|gvRvxZKTlU z8A1*&%DrH+jtKjO679MhbCOzN^Ydj<3b@bCqaVuKpo|!5poYf9d;q|3u5U?d-VE93 zE7^TIqc5c?LcVQFQu9a}4r-70Z3bVHyD<_)(TGZFmNP50UQMtI5V=@Qe=G(;mp;_F z(qirt0m+CleDyII47VHKZ3-*%-ld(tt@wXI{h>vDLhqsqDQP(Q^%IDpPCmtYWOA-?{cF{ml!j{_`aG;1O0whm#{17vD34y@+JeBe|kc;_zEtPDdvN7k+pr| z;^=`fUEYCs##?8{}{A&lofV`KUoK z9Wdx63nRJDF1@I*wNLQ2s250{teM9xg)S_aJ?D=I$(|8OzM_f#V{UTevlVnXKLPpBNK%nh`6-b)eclk9N(MJS1(KoD3w{;Txs|$UDcT zFPP1a^BpVa)+k?0oEEXp9BJ;5lH@0t;|MlnLV<#px{6A(0x&#m7FJT~Ya;Ft=8DD2 zDmAspJymRIa44D@?>;xQ--mvfYpsFbNg*n0tktFL zm4UYe3GZXh+L>&bl{Eo}ESyKC*}@YnQC}tG$}QQSDH`^1x$CVG==pF)tJkQ*uGwq1 z==sDT-S*Lw;l(|R5O8J${IW~aIP~X(7AT+#YZt2d0d~3%sF3|Wev z0SyoP1FiReA62nTB>GxLS)}Be9o>}5PKZ+(t?=2MR=WWs)(Z#7rLimjCe z1rPdFc#~5nXnwJ{*=~qsJAAI#yt5lHcoQWL1hTawetEfX4TJ`jf@OI}Rn!*XI;|L- z!*2Pfc`uCxrGncYb8k?c<_)@{Ne5S^B!OfDYzM2GYv1CW1i{@C+r7Ce;99Yewe8ZB zWItWT$M3ahY!Kk&FwfY72KSPN6!?nKu`dTDZTQFY0Y6w7ITsMxE3J z@ZDqwQCM>e4XX;#6zS6TZ=i8?rJ|EF$Fcn+YiX3kaD0(n%th<~G$iM%`z9vQ`Ln2|O z#P~sj>HF=SAHF5D{!6iYgg2ZM#xJ%xsYPad(jhq<);7=^Fkohf8sNsDV4YoXol?t! z>7-mfrvhw^JU|CHSFkBktHd6M%#jALWe?jvnP48lEw7Nx_F4b^KSc4Sv;OTgU$L={ zD<1y+9(g}#*3UIMHn<=F&d6xkxDXb^{YM9;5rsB0!sKJ9o6sDN?Bj+Ie1%jMK>yqY zIgp6cj)+^rqKb_6Hzl#H5-D`d+i7qeOx$eYdxK$hObVS)oiA)bf+e*d0XZsu;QKYc zE!X~pGrf#`FPLs@%dhc9(ABGEJ&{@yK>#&SLDz*0rk9fD-{uUVQCJP{C8;Jody`y3 z@J%z68~!56c|E`WM3uq1`+6xO-IwY+)|=&*(@paH!-8d{I~2dbE$dh>t#RU+UMuHZ zI9p1s7s^}Hc3O0%JK7doEN`7uY~W&O+-#kMzwNg;G6`<$U4% zZNp@tQ(O}+X@tx@Q}xW^RgvgV!o5E3Y2dYVP?2a@lCw-QSil`ugTwWxN}4bbUI}yI z>_&=A2;DX?i?w88pLwGo>pk7*4T$KMH2V9u(?#}Fjeif>bR=>8x9A4(OXFu(j9X`J zC7IlkrKgwTHFnbQml{qb(;}x`y1%7VqEyup!1*2MF`iwNyBW||4u$N`xmLO!=NoCOh(F7JzVXL6;${-2&mMx zUgmlZX|l5Jrrfi9VCN2|n>MK6N~@$Mi+M&yIZ&YoJ)?>~fZzl^r~JDh?2fII#Q$5j bb1MEfV$Vv7ja@01U?2k>q;|QcW5oXeDv