|
|
|
@ -15,3 +15,30 @@ html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
|
|
|
|
|
# 第一行是标题,分离出来
|
|
|
|
|
title = html_content.split('\n')[0]
|
|
|
|
|
print(title)
|
|
|
|
|
|
|
|
|
|
# 按行遍历html_content,当发现空行时,删除空行前面的内容,只保留后面的内容
|
|
|
|
|
lines = html_content.split('\n')
|
|
|
|
|
content_after_empty_line = ""
|
|
|
|
|
found_empty_line = False
|
|
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
|
if not found_empty_line and line.strip() == "":
|
|
|
|
|
# 找到第一个空行
|
|
|
|
|
found_empty_line = True
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if found_empty_line:
|
|
|
|
|
# 空行后的内容添加到结果中
|
|
|
|
|
content_after_empty_line += line + "\n"
|
|
|
|
|
|
|
|
|
|
# 如果没有找到空行,保留原始内容
|
|
|
|
|
if not found_empty_line:
|
|
|
|
|
content_after_empty_line = html_content
|
|
|
|
|
|
|
|
|
|
for x in content_after_empty_line.split("\n"):
|
|
|
|
|
if x.strip() == "" :
|
|
|
|
|
continue
|
|
|
|
|
print(x)
|
|
|
|
|
|
|
|
|
|
# 关闭浏览器
|
|
|
|
|
driver.quit()
|
|
|
|
|