From a8d60689bcb02ee1bf320f6c42db086505787294 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Fri, 8 Nov 2024 19:50:15 +0800 Subject: [PATCH] 'commit' --- BaiHu/Tools/DocxTuBiaoAdd.py | 43 +++++++++++++++++++++++++++++++ BaiHu/Tools/YunNan.py | 24 +++++++++++++++-- BaiHu/Tools/汇总的内容.xlsx | Bin 0 -> 5467 bytes 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 BaiHu/Tools/DocxTuBiaoAdd.py create mode 100644 BaiHu/Tools/汇总的内容.xlsx diff --git a/BaiHu/Tools/DocxTuBiaoAdd.py b/BaiHu/Tools/DocxTuBiaoAdd.py new file mode 100644 index 00000000..b0c40367 --- /dev/null +++ b/BaiHu/Tools/DocxTuBiaoAdd.py @@ -0,0 +1,43 @@ +# pip install pywin32 +import win32com +from win32com.client import Dispatch + +docApp = win32com.client.Dispatch('Word.Application') +docApp.Visible = True +docApp.DisplayAlerts = 0 +doc = docApp.Documents.Open('c:/1.docx') + +# 创建图表,图表的插入位置为预先在word文档中插入的书签,书签名为“插入图表位置” +shape_chart = doc.Shapes.AddChart2(Style=201, Type=51, Top=doc.Bookmarks("插入图表位置").Select()) +shape_chart.WrapFormat.Type = 7 # 设置图表为嵌入型 + +# 设置Word中的图表 +chart = shape_chart.Chart +worksheet = chart.ChartData.Workbook.Worksheets(1) # 图表数据对应的工作表 +chart.SetSourceData("Sheet1!$A$1:$C$4") # 设置数据源范围 + +# 簇状柱形图测试数据 +chart_data = [["", "系列A", "系列B", "系列C", "系列D"], + [2020, 2, 4, 2, 3], + [2019, 4, 5, 3, 2]] + +# 清空工作表默认数据 +worksheet.Range("A1:D5").value = None + +# 填入测试数据 +for row_index, row in enumerate(chart_data): + for column_index, value in enumerate(row): + worksheet.Cells(row_index + 1, column_index + 1).Value = value + +chart.SetSourceData("Sheet1!$A$1:$E$3") # 设置数据源范围 + +# 设置图表样式示例 +chart.ChartTitle.Text = '测试标题' # 设置标题 +chart.FullSeriesCollection(2).Format.Fill.ForeColor.ObjectThemeColor = 10 # 设置系列2的填充颜色 + +chart.ChartData.Workbook.Close() # 关闭workbook窗口 + +doc.Save() +doc.Close() +docApp.Quit() + diff --git a/BaiHu/Tools/YunNan.py b/BaiHu/Tools/YunNan.py index de365397..96af6f67 100644 --- a/BaiHu/Tools/YunNan.py +++ b/BaiHu/Tools/YunNan.py @@ -1,9 +1,15 @@ # pip install pymysql # pip install requests beautifulsoup4 +# 查看结果 +# select * from t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id; + +import time + import pymysql import requests from bs4 import BeautifulSoup +import re if __name__ == '__main__': # 遍历 mysql数据库,然后开启爬虫 @@ -28,9 +34,11 @@ if __name__ == '__main__': result: tuple = cursor.fetchall() for e in result: + id = e[0] area_name = e[1] url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box" + print(url) # 发送HTTP GET请求 response = requests.get(url) # 检查请求是否成功 @@ -39,10 +47,22 @@ if __name__ == '__main__': soup = BeautifulSoup(response.text, 'html.parser') # 假设我们要抓取的是

标签中的文字 # 你可以根据需要修改选择器来抓取不同的内容 - h1_text = soup.find('h1').text - print(h1_text) # 打印抓取的文字 + specific_divs = soup.select('div.para_YYuCh.summary_nfAdr.MARK_MODULE') + # 遍历找到的所有特定div标签,并打印它们的文本内容 + for div in specific_divs: + text = div.get_text(strip=True) # 使用get_text()方法获取文本,并去除 + # 使用正则表达式移除所有形如[数字]和[数字-数字]的字符串 + cleaned_text = re.sub(r'\[\d+(?:-\d+)?\]', '', text) + sql = "update t_dm_area set memo=%s where id=%s" + cursor.execute(sql, (cleaned_text, id)) + conn.commit() + print("更新"+area_name+"数据成功") + break else: print('Failed to retrieve the webpage') + + time.sleep(2) # 关闭游标和连接 cursor.close() conn.close() + print("结束") \ No newline at end of file diff --git a/BaiHu/Tools/汇总的内容.xlsx b/BaiHu/Tools/汇总的内容.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..746257ac2b9ef4cb55583f188275d4696d05591a GIT binary patch literal 5467 zcmZ`-1yqz>*Bzuoy1S)e2$7Ix92mM0vFJ_-5s=P7Bn3fw2&G$LBu0=1>6UJX5(W_X zM}5D~U;g)>SaLV)C8=||?v|EJ*KCfk0>e-d`=6Gmi zlOAL9fZBUvS}mzC#J(MwM5pu2sYSgH2R$NV_bD@AlZZx1Xj&OOg~3?M{hAqyLvHLQ z>2I-xhu{{C%;+99EDvmP;xk5X2M9Wv7U%$_-%`1m3pJf6c1eZ?%g9_GySYe382Tf5 z(C7lk@$|lrLxMy+)G*g(v_d^SyOUQX>7DS=haco(2Far>%m?9S1h$xTBEFUou;rsv zUtzNdtjh?OJRVp?%j4hO1#``?qmCKW{JYr`6~iC&$L=M z(aLLF0{{ro>jQTyM-O44pYPIyK6OkW>2qW-l5};huP!)Jd>TUbl%kkKgu7aPo0}p* zx8B8tl#;*p@GCGe%>_jEN(h>@oq z)(F2VKNPAL?JkdaN5Ex?*$i$Pm!5tjyBf_Awm#`CF2RuVHpS zC3%mQgdV;6I12w(#oNW*;jxR0!_Tz*N1;}GbZ)93!!AN5`~+nL3z9xJhANdgonk26 zHRG=1teMWgIExO25r+yR_lC@_Z2GN#Vy1Y~%3l;sj`ftnam@@nYq~yyh7>Vn zHjDd$0K{oBdhFffy)CiE=5EgUQeT$WwRg`@U6dGCQ#Eq^K%3%Awp7nZcTt~KJr9l5 ziNLTp=4OmWWhKFv4kOk&3&f>m1Z;z-EaZfK@F1DM7tihv@k@^)Ey1ebYSn#|fvOg^ zFOxO!2GN@74@{}uKyfp!Icg`%dz= zLgzimL{oIxW}p?K{jJbXZ9HtPtUNu0e=WeDB0=?j)ov5V9!UyvL=A$Ws#c<8){23@ z_x?i{cV)T{hu?I|yv2>0-0rYxIcsb0I}F0`^mL{clQ8Lp16ULd(+ca+SpwP9*2;k$VaVAi*Y7fNI z*akVa&<|g2+g$bdAwDTEgQ+Tdf=+zy`i!*JAUSTwGu!@2>Zl4S>*TX*B66?Yj99+_ z_dr<}eCHT{heNM-9SK2);|4k$B!9y3pHzju)YNVlAqq!=e?)xt9wTw0dOfDK2+>e_ zj+p4raT~X_)8pc0x@0cY$DO$~S#BY}za?PY398ht(W1{(7L5wZ5rW1)&1yeeUD%|5 zb#RZcEVhFYKG<5)&}4Gd!7LXmC34L+Re8t%LnOQL*o3PjP|*Hqaja9r4ZGu)4ub9r z_A2v5JkTWp`-Tx$c7g3cT3Cr?ls9afe&hnHvZ$f(ySbnl?x(5*g8CbGV(WLrTx^kf z*I*3?$4XBrr8LyinJ+0F$LBotuz@i)q*OMEEnmV2hdOxKCb&7zvm z=m-*+y!8l&*W$G{Y98dNTeqCD&8saq@LBHtxaX(RSnf}JAP|Eod#cLvO%epd^>AU+ zUe8myH++#a9hQ$MQxzrcs7!d9FcT^w4jVQV#Wi}OKC8E*Do-Z|Xd7J5N8$Xf`==8Q zv0k*^#Axk*-f8HA!o&8FyOpKBr@NiAjmOUtsIDyk+^5R}5#;Od9jG=G9TC&f1NTAt zwYz#D^@ar3-G*y*^mv5SdLe4PT?3F>!x9ZaqpJF)l`38#Azi>`AAg-;HxDmw#g}nm zt$uKyrodQ_Ft`u1JAZ{{g*ye%sJe0b30t)|iuV&;aWPk;)F}uD$NID}D$Q4&pzEZ- zm#a=4rcO}cn-*43+%E#p)O3SoYDUcto1bf+)laXdPE4+csIcY^z=Zv8+Q_|MPrMD) z_UdiZktt=0!PoZM4rsK+Q8HjzXORK?y?ZxWs?u}x&gHNHfb0M4-qY97>esfL4d!Dq zL`VaK4+HUhk&Ohmr*fH_L&B8I2l=t z-HafZq1anS9XEZxoOJ^Y zRuk%<;~RN4KWHV2n#U3$fPyOeZMko+a9L!jwOR4-94?6Dc{yTLgHwG5+X5&UbaKL8 z>mcsEZ;FCejN6zJB^9me$@#(LcS`i5pXoymi{c3264;*t5n0QhRNQF9zNmB_ASc<6 z<8GeaxGP%Ae`z+k!7#azqB$Y*`NVed*^nPmOW!4*Y9ZA}aEp-829!$p)h5yhis4)w zsH89=7aTzzo;9T!^aREFDfyj466)jIZ25BwHg6}Rym;0_{seX#F(>fWzkl5mJEod1rd4R+9D6Jvczqm&OkY`g>CMUfFt6!R=er|Ih_s2}*xO^h zmi3J}N0FhSJduRtlEipLBkK3M%T0NL*}n_#o^o{gh|O6~t5ViValY9*2na+mt?ehD zkts*0fV-FKR_?tsjYPQ&0w(JbnCGw-ONqTXM=s@XX|B z%*sy@VSrr<8Xu6uHdho$9}$x#Bcch_nxAc`ss8$4;G}%Z!wc?=t9>clWbYqVmA84Qj_Q#*TaV3=rh|`;#w(vu-P* zAGv_tLsYA9CkYF)JZe(G=Xz(57HaWK_~PP7|8P7Jd0+OlJ2qAN;y6q0Y-e1!PoV%a zEASlkqVPpl;K50jysT(TJvQ^^E(SDGMY4-*YK?5dnp7ct(UCHQbeJ5Q7{d;5WMFb_ zF5{7P+Y|7luQ!%BfMR#^@V?r3h>ix81fV=Z7r&uLYRmPJ_}WuGh@=fYuJyv>lBFY{ zoOsu_H+W7i=?dKX6g|#hb^C9?$eYAq4N7(79zGjR)rX{%AMYXltz0bpo~gOSFiyuhnS5)Tt1CTUnCG1oyiMu0MjVWJDMBy|6Qun{!OE&||#i(=+%P90`x{uP;d%->K`taoF;3>kuwsOz2Fi z#FVgD4)48Nja6{Rf1;O~Zs;+$wo-!tBH1;lWm1Ctp1+cz59uNn;#OLy0`)u0C~~0% z&WDEk{c(wuu>(gEPCOiAO@Y%tEaKf9DeDUwILER6{A3-55+`U~v4(cAV)#6A4 zySi1)BAn3S#E--8)^gb_L^$$BkWwMOCjGZnL=hZ%h|fU!Me#9Dv#rtl!}(6)a~f09 zieEl8QY|)lMCv(lsUO)(RW&pz-N{ZiJ{DMB%6Ec3N(k=Z!3Yp_Qh0^!Hob57sGC3L zB)@gF#?w9-Csl=0sf0s&YKYjz?MPi*l_NU`-QULc47|K5^&G( zQ@^|E%#yw;Lh+WglJZTeGulJ1gKc_U{DIihXKdL@v+R4_$qx)#yNtzEpFB(nZCF3Sa3kQoNrlN;Lq_hQ6Q*{kGY!O09p(98_ zT1HXRY?|OIu%VKG|3b#5I&rE$|JqB*MplC}afM)|Lj4?8OKmDIcyOBjcviUwNHxo} zh(Ga%H<%coA%ApQW2au{7Lq%RLv*GqdlI%uk8@t{v+u^woi*GUQowpFP#yVJ!hc7% zV$Yf{V^7yi((K;xSilIkD@tL5M%2ADl z1HEJKhG!d+ArBbkBy1%4GW_2+jC8$q zWIF%Mb!5B$QWPL}l;F8_#W6J!x1DpNTLE2k14@Q&1uR`GblhEBJ%lY>+^v3gL754C zKaEl-G7KIvG@D|TN*cPOVCHV0J(!d-7h0pHQp&FX#Rp%8dijnhuM zv2;iX3j#cPh8uk%Ani#h*}+Twc%yDY^rElsd5mngpqduEDgukDR6Y}x|_`)ub0EIHjQ#;|wdNU3gq4eF0 zCr?;CkYKCnwF90>$2M)sV~*Zho<~r8eRXz9As+bmy?NtS48jjBDj6mKK>EK$edOx; zTT?}cB@xmvWY{?g=vc+eB!%2EIbr)tdkRiMosA)LXKodzxPIc{xUpB{R*m1?thrv< zH*zmHorz2Pq$6#c_)UQWrZ`$>-3|U-%-(_4;{wWhWqM;WJd5FCD#Cy(AB^`V)#XHw(WVLiv%stWbi zDkEye-GT51%u!*dJPj3I>APOpU7bXIes`yXt%M13I(bbCb9Lh;y1e*soG|s02ISz6 zF%@`F6O79vGNY@70yxU#sUW#vPXDL|F-r`VO=$EX$>L*mS&<6-rMfkK_+F;=X+rb9 zla7#J{EN+Uw{jG%S3oIV|5lS@v{N3t!3IvplNZZyVSQ94iK%Xcy85 z#e9<0o_Aead-FBR#WVoj2xDBM{MGXR`xpxytzVzIpr6aX47IKjT%E%Fh64a~LBZ$@ z_+K-btH7(Xia)?)bS?h>lZ&f7R~`Kyo_cipp?Ut*-(O|9YDE9AfY1%#KP>;TrB}gM z-P9j29OvJ?)m4V8j^qzR6Yg(@|N4`w(5p@AALugvFX+{l^(w*DBk&JFC)$$#p9AqK k{A#iPfit2H)c?Z&DSaIXHd+z@fB^k{j*jyu@}Ix{2a*U>ApigX literal 0 HcmV?d00001