|
|
@ -27,6 +27,8 @@ import time
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from Util.LightRagUtil import initialize_pg_rag
|
|
|
|
from Util.PostgreSQLUtil import init_postgres_pool
|
|
|
|
from Util.PostgreSQLUtil import init_postgres_pool
|
|
|
|
from Util.WxGzhUtil import init_wechat_browser, get_article_content
|
|
|
|
from Util.WxGzhUtil import init_wechat_browser, get_article_content
|
|
|
|
|
|
|
|
|
|
|
@ -72,9 +74,21 @@ async def save_article_to_db(pool, article_title, account_name, article_url, pub
|
|
|
|
if await is_article_exist(pool, article_url):
|
|
|
|
if await is_article_exist(pool, article_url):
|
|
|
|
logger.info(f"文章已存在,跳过保存: {article_url}")
|
|
|
|
logger.info(f"文章已存在,跳过保存: {article_url}")
|
|
|
|
return
|
|
|
|
return
|
|
|
|
# 准备在这里调用 lightrag进行知识库构建
|
|
|
|
# 在这里调用 lightrag进行知识库构建
|
|
|
|
# TODO
|
|
|
|
workspace = 'ChangChun'
|
|
|
|
|
|
|
|
# 使用PG库后,这个是没有用的,但目前的项目代码要求必传,就写一个吧。
|
|
|
|
|
|
|
|
WORKING_DIR = f"./output"
|
|
|
|
|
|
|
|
docx_name = f"{account_name}_{article_title}" # 组合来源和标题作为文档名
|
|
|
|
|
|
|
|
logger.info(f"开始处理文档: {docx_name}")
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
|
|
|
|
|
|
|
|
await rag.ainsert(input=content, file_paths=[docx_name])
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
|
|
if rag:
|
|
|
|
|
|
|
|
await rag.finalize_storages()
|
|
|
|
|
|
|
|
if pool:
|
|
|
|
|
|
|
|
await pool.close()
|
|
|
|
|
|
|
|
logger.info(f"保存文档到知识库成功: {docx_name}")
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
async with pool.acquire() as conn:
|
|
|
|
async with pool.acquire() as conn:
|
|
|
|
await conn.execute('''
|
|
|
|
await conn.execute('''
|
|
|
|