From 78633b321cb5981b8a80ba0a94aed0022cf66f67 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 15 Jul 2025 09:43:31 +0800 Subject: [PATCH] 'commit' --- dsLightRag/Test/T3_GetArticle.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/Test/T3_GetArticle.py index 6dae6c0b..baaecd48 100644 --- a/dsLightRag/Test/T3_GetArticle.py +++ b/dsLightRag/Test/T3_GetArticle.py @@ -15,3 +15,30 @@ html_content = driver.find_element(By.CLASS_NAME, "rich_media").text # 第一行是标题,分离出来 title = html_content.split('\n')[0] print(title) + +# 按行遍历html_content,当发现空行时,删除空行前面的内容,只保留后面的内容 +lines = html_content.split('\n') +content_after_empty_line = "" +found_empty_line = False + +for line in lines: + if not found_empty_line and line.strip() == "": + # 找到第一个空行 + found_empty_line = True + continue + + if found_empty_line: + # 空行后的内容添加到结果中 + content_after_empty_line += line + "\n" + +# 如果没有找到空行,保留原始内容 +if not found_empty_line: + content_after_empty_line = html_content + +for x in content_after_empty_line.split("\n"): + if x.strip() == "" : + continue + print(x) + +# 关闭浏览器 +driver.quit()