You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

105 lines
3.5 KiB

3 weeks ago
import asyncio
import sys
import loguru
from raganything import RAGAnything, RAGAnythingConfig
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
3 weeks ago
from Config.Config import *
3 weeks ago
async def main():
# 在main函数开头添加日志配置
loguru.logger.remove() # 移除默认配置
loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台
# Create RAGAnything configuration
config = RAGAnythingConfig(
working_dir="./rag_storage",
mineru_parse_method="auto",
enable_image_processing=True,
enable_table_processing=True,
enable_equation_processing=True,
)
# Define LLM model function
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return openai_complete_if_cache(
3 weeks ago
LLM_MODEL_NAME,
3 weeks ago
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
3 weeks ago
api_key=LLM_API_KEY,
base_url=LLM_BASE_URL,
3 weeks ago
**kwargs,
)
# Define vision model function for image processing
def vision_model_func(
prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
if image_data:
return openai_complete_if_cache(
3 weeks ago
VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称
3 weeks ago
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
3 weeks ago
api_key=VISION_API_KEY,
base_url=VISION_BASE_URL,
3 weeks ago
**kwargs,
)
else:
return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
# Define embedding function
embedding_func = EmbeddingFunc(
embedding_dim=1024, # 这里设置为了硅基流动中免费模型的嵌入维度
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
3 weeks ago
model=EMBED_MODEL_NAME,
api_key=EMBED_API_KEY,
base_url=EMBED_BASE_URL,
3 weeks ago
),
)
# Initialize RAGAnything
rag = RAGAnything(
config=config,
llm_model_func=llm_model_func,
vision_model_func=vision_model_func,
embedding_func=embedding_func,
)
3 weeks ago
# file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
3 weeks ago
file_path = "./Txt/驿来特平台安全.docx"
3 weeks ago
await rag.process_document_complete(
3 weeks ago
file_path=file_path,
3 weeks ago
output_dir="./output",
parse_method="auto"
)
3 weeks ago
print("Processing complete.")
3 weeks ago
if __name__ == "__main__":
asyncio.run(main())