diff --git a/dsLightRag/ShiTi/T3_DocxToMd.py b/dsLightRag/ShiTi/T3_DocxToMd.py index 1a00b600..53ddd8e9 100644 --- a/dsLightRag/ShiTi/T3_DocxToMd.py +++ b/dsLightRag/ShiTi/T3_DocxToMd.py @@ -89,15 +89,19 @@ async def main(): question_types = ["不定项选择", "单选题", "多选题", "填空题", "判断题", "完型填空题", "计算题"] # 按 【题型】 分隔开 - content=content.replace("\n\n","\n") + content = content.replace("\n\n", "\n") questions = content.split('【题型】') - idx=0 + idx = 0 for q in questions: - idx=idx+1 - print("第"+str(idx)+"题:") - if q.strip() == "" or q=='\r\n' or q=='\n': + # 干掉 【题型】前面的文档标题,比如: # 《动能定理》巩固练习 + if idx == 0 and q != "": + idx = idx + 1 continue + # 干掉空行 + if q.strip() == "" or q == '\n': + continue + q='【题型】'+q print(q) print("\n")