from docx.shared import Cm from pptx.enum.shapes import MSO_SHAPE_TYPE import hashlib from docx import Document from pptx import Presentation def WordReplaceText(doc, tag, pv): for paragraph in doc.paragraphs: if tag not in paragraph.text: continue tmp = '' runs = paragraph.runs for i, run in enumerate(runs): tmp += run.text # 合并run字符串 if tag in tmp: # 如果存在匹配得字符串,那么将当前得run替换成合并后得字符串 run.text = run.text.replace(run.text, tmp) run.text = run.text.replace(tag, pv) tmp = '' else: # 如果没匹配到目标字符串则把当前run置空 run.text = run.text.replace(run.text, '') if i == len(runs) - 1: # 如果是当前段落一直没有符合规则得字符串直接将当前run替换为tmp run.add_text(tmp) def WordReplacePic(doc, tag, pic, width=14.63): for paragraph in doc.paragraphs: if tag in paragraph.text: has_replaced = False for run in paragraph.runs: run.clear() if not has_replaced: run.add_picture(pic, width=Cm(width)) has_replaced = True def WordReplaceTextInTable(doc, tag, pv): for table in doc.tables: for row in table.rows: for cell in row.cells: # 如果只是为了内容,直接替换cell.text,但是为了保存原有格式,需要将每个单元格的文本当作一段看待,以此提取出run来不修改原格式 for paragraph in cell.paragraphs: if tag in paragraph.text: has_replaced = False for run in paragraph.runs: run.clear() if not has_replaced: run.add_text(pv) has_replaced = True def PptReplaceText(prs, search_str, repl_str): for x in range(len(prs.slides)): for shape in prs.slides[x].shapes: if hasattr(shape, "text"): if (shape.text.find(search_str)) != -1: text_frame = shape.text_frame cur_texts = text_frame.paragraphs[0].runs for index in range(len(cur_texts)): if (cur_texts[index].text.find(search_str)) != -1: cur_text = text_frame.paragraphs[0].runs[index].text new_text = cur_text.replace(str(search_str), str(repl_str)) text_frame.paragraphs[0].runs[index].text = new_text group_shapes = [shp for shp in prs.slides[x].shapes if shp.shape_type == MSO_SHAPE_TYPE.GROUP] for group_shape in group_shapes: for shape in group_shape.shapes: if shape.has_text_frame: if (shape.text.find(search_str)) != -1: text_frame = shape.text_frame for index in range(len(text_frame.paragraphs)): cur_text = text_frame.paragraphs[index].text if (cur_text.find(search_str)) != -1: new_text = cur_text.replace(str(search_str), str(repl_str)) text_frame.paragraphs[index].text = new_text def PptReplacePic(prs, newpic, oldpic): # 把旧样本图片Logo,获取指纹 imageFile = open(oldpic, "rb") imgBlob = imageFile.read() md5finger = hashlib.md5(imgBlob).hexdigest() for slide in list(prs.slides)[0:]: for shape in list(slide.shapes): ispicture = False try: md5img = hashlib.md5(shape.image.blob).hexdigest() ispicture = True except: pass e = shape.element if ispicture: if md5img == md5finger: slide.shapes.add_picture(newpic, shape.left, shape.top, shape.width, shape.height) e.getparent().remove(e) pass def ReplaceTxtInTable(ppt, oldStr, newStr): # 所有幻灯片 for i in range(0, len(ppt.slides)): placeholder = ppt.slides[i].shapes for j in range(len(placeholder)): if placeholder[j].has_table: # 所有表格 for row in range(len(placeholder[j].table.rows)): for col in range(len(placeholder[j].table.columns)): if placeholder[j].table.cell(row, col).text == oldStr: placeholder[j].table.cell(row, col).text = newStr