|
|
|
@ -4,7 +4,7 @@ from Util.SplitDocxUtil import SplitDocxUtil
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_into_blocks(text):
|
|
|
|
|
"""按行遍历文本,发现'问题X'或'话题X'时开始分割"""
|
|
|
|
|
"""按行遍历文本,发现'问题X'或'话题X'时开始分割,但去除这些前缀字符串"""
|
|
|
|
|
blocks = []
|
|
|
|
|
current_block = []
|
|
|
|
|
in_block = False
|
|
|
|
@ -15,8 +15,10 @@ def split_into_blocks(text):
|
|
|
|
|
blocks.append('\n'.join(current_block))
|
|
|
|
|
current_block = []
|
|
|
|
|
in_block = True
|
|
|
|
|
# 去除前缀字符串
|
|
|
|
|
line = line[line.find(' ')+1:] if ' ' in line else ''
|
|
|
|
|
|
|
|
|
|
if in_block:
|
|
|
|
|
if in_block and line: # 只添加非空行
|
|
|
|
|
current_block.append(line)
|
|
|
|
|
|
|
|
|
|
if current_block:
|
|
|
|
|