You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
4.8 KiB

2 years ago
from docx.shared import Cm
from pptx.enum.shapes import MSO_SHAPE_TYPE
import hashlib
from docx import Document
from pptx import Presentation
def WordReplaceText(doc, tag, pv):
for paragraph in doc.paragraphs:
if tag not in paragraph.text:
continue
tmp = ''
runs = paragraph.runs
for i, run in enumerate(runs):
tmp += run.text # 合并run字符串
if tag in tmp:
# 如果存在匹配得字符串那么将当前得run替换成合并后得字符串
run.text = run.text.replace(run.text, tmp)
run.text = run.text.replace(tag, pv)
tmp = ''
else:
# 如果没匹配到目标字符串则把当前run置空
run.text = run.text.replace(run.text, '')
if i == len(runs) - 1:
# 如果是当前段落一直没有符合规则得字符串直接将当前run替换为tmp
run.add_text(tmp)
def WordReplacePic(doc, tag, pic, width=14.63):
for paragraph in doc.paragraphs:
if tag in paragraph.text:
has_replaced = False
for run in paragraph.runs:
run.clear()
if not has_replaced:
run.add_picture(pic, width=Cm(width))
has_replaced = True
def WordReplaceTextInTable(doc, tag, pv):
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
# 如果只是为了内容直接替换cell.text,但是为了保存原有格式需要将每个单元格的文本当作一段看待以此提取出run来不修改原格式
for paragraph in cell.paragraphs:
if tag in paragraph.text:
has_replaced = False
for run in paragraph.runs:
run.clear()
if not has_replaced:
run.add_text(pv)
has_replaced = True
def PptReplaceText(prs, search_str, repl_str):
for x in range(len(prs.slides)):
for shape in prs.slides[x].shapes:
if hasattr(shape, "text"):
if (shape.text.find(search_str)) != -1:
text_frame = shape.text_frame
cur_texts = text_frame.paragraphs[0].runs
for index in range(len(cur_texts)):
if (cur_texts[index].text.find(search_str)) != -1:
cur_text = text_frame.paragraphs[0].runs[index].text
new_text = cur_text.replace(str(search_str), str(repl_str))
text_frame.paragraphs[0].runs[index].text = new_text
group_shapes = [shp for shp in prs.slides[x].shapes
if shp.shape_type == MSO_SHAPE_TYPE.GROUP]
for group_shape in group_shapes:
for shape in group_shape.shapes:
if shape.has_text_frame:
if (shape.text.find(search_str)) != -1:
text_frame = shape.text_frame
for index in range(len(text_frame.paragraphs)):
cur_text = text_frame.paragraphs[index].text
if (cur_text.find(search_str)) != -1:
new_text = cur_text.replace(str(search_str), str(repl_str))
text_frame.paragraphs[index].text = new_text
def PptReplacePic(prs, newpic, oldpic):
# 把旧样本图片Logo,获取指纹
imageFile = open(oldpic, "rb")
imgBlob = imageFile.read()
md5finger = hashlib.md5(imgBlob).hexdigest()
for slide in list(prs.slides)[0:]:
for shape in list(slide.shapes):
ispicture = False
try:
md5img = hashlib.md5(shape.image.blob).hexdigest()
ispicture = True
except:
pass
e = shape.element
if ispicture:
if md5img == md5finger:
slide.shapes.add_picture(newpic, shape.left, shape.top, shape.width, shape.height)
e.getparent().remove(e)
pass
def ReplaceTxtInTable(ppt, oldStr, newStr):
# 所有幻灯片
for i in range(0, len(ppt.slides)):
placeholder = ppt.slides[i].shapes
for j in range(len(placeholder)):
if placeholder[j].has_table: # 所有表格
for row in range(len(placeholder[j].table.rows)):
for col in range(len(placeholder[j].table.columns)):
if placeholder[j].table.cell(row, col).text == oldStr:
placeholder[j].table.cell(row, col).text = newStr