main
黄海 9 months ago
parent 6fc9d2aef7
commit a8d60689bc

@ -0,0 +1,43 @@
# pip install pywin32
import win32com
from win32com.client import Dispatch
docApp = win32com.client.Dispatch('Word.Application')
docApp.Visible = True
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open('c:/1.docx')
# 创建图表图表的插入位置为预先在word文档中插入的书签书签名为“插入图表位置”
shape_chart = doc.Shapes.AddChart2(Style=201, Type=51, Top=doc.Bookmarks("插入图表位置").Select())
shape_chart.WrapFormat.Type = 7 # 设置图表为嵌入型
# 设置Word中的图表
chart = shape_chart.Chart
worksheet = chart.ChartData.Workbook.Worksheets(1) # 图表数据对应的工作表
chart.SetSourceData("Sheet1!$A$1:$C$4") # 设置数据源范围
# 簇状柱形图测试数据
chart_data = [["", "系列A", "系列B", "系列C", "系列D"],
[2020, 2, 4, 2, 3],
[2019, 4, 5, 3, 2]]
# 清空工作表默认数据
worksheet.Range("A1:D5").value = None
# 填入测试数据
for row_index, row in enumerate(chart_data):
for column_index, value in enumerate(row):
worksheet.Cells(row_index + 1, column_index + 1).Value = value
chart.SetSourceData("Sheet1!$A$1:$E$3") # 设置数据源范围
# 设置图表样式示例
chart.ChartTitle.Text = '测试标题' # 设置标题
chart.FullSeriesCollection(2).Format.Fill.ForeColor.ObjectThemeColor = 10 # 设置系列2的填充颜色
chart.ChartData.Workbook.Close() # 关闭workbook窗口
doc.Save()
doc.Close()
docApp.Quit()

@ -1,9 +1,15 @@
# pip install pymysql
# pip install requests beautifulsoup4
# 查看结果
# select * from t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id;
import time
import pymysql
import requests
from bs4 import BeautifulSoup
import re
if __name__ == '__main__':
# 遍历 mysql数据库然后开启爬虫
@ -28,9 +34,11 @@ if __name__ == '__main__':
result: tuple = cursor.fetchall()
for e in result:
id = e[0]
area_name = e[1]
url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box"
print(url)
# 发送HTTP GET请求
response = requests.get(url)
# 检查请求是否成功
@ -39,10 +47,22 @@ if __name__ == '__main__':
soup = BeautifulSoup(response.text, 'html.parser')
# 假设我们要抓取的是<h1>标签中的文字
# 你可以根据需要修改选择器来抓取不同的内容
h1_text = soup.find('h1').text
print(h1_text) # 打印抓取的文字
specific_divs = soup.select('div.para_YYuCh.summary_nfAdr.MARK_MODULE')
# 遍历找到的所有特定div标签并打印它们的文本内容
for div in specific_divs:
text = div.get_text(strip=True) # 使用get_text()方法获取文本,并去除
# 使用正则表达式移除所有形如[数字]和[数字-数字]的字符串
cleaned_text = re.sub(r'\[\d+(?:-\d+)?\]', '', text)
sql = "update t_dm_area set memo=%s where id=%s"
cursor.execute(sql, (cleaned_text, id))
conn.commit()
print("更新"+area_name+"数据成功")
break
else:
print('Failed to retrieve the webpage')
time.sleep(2)
# 关闭游标和连接
cursor.close()
conn.close()
print("结束")

Binary file not shown.
Loading…
Cancel
Save