'commit'

1 week ago · 78633b321c
parent 3f51637bcf
commit 78633b321c
1 changed files with 27 additions and 0 deletions
--- a/dsLightRag/Test/T3_GetArticle.py
+++ b/dsLightRag/Test/T3_GetArticle.py
@ -15,3 +15,30 @@ html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
 # 第一行是标题，分离出来
 title = html_content.split('\n')[0]
 print(title)
+
+# 按行遍历html_content，当发现空行时，删除空行前面的内容，只保留后面的内容
+lines = html_content.split('\n')
+content_after_empty_line = ""
+found_empty_line = False
+
+for line in lines:
+    if not found_empty_line and line.strip() == "":
+        # 找到第一个空行
+        found_empty_line = True
+        continue
+
+    if found_empty_line:
+        # 空行后的内容添加到结果中
+        content_after_empty_line += line + "\n"
+
+# 如果没有找到空行，保留原始内容
+if not found_empty_line:
+    content_after_empty_line = html_content
+
+for x in content_after_empty_line.split("\n"):
+    if x.strip() == "" :
+        continue
+    print(x)
+
+# 关闭浏览器
+driver.quit()