main
HuangHai 3 weeks ago
parent b0ee967337
commit 717e7bd891

@ -4,4 +4,7 @@
<option name="sdkName" value="D:\anaconda3\envs\raganything" /> <option name="sdkName" value="D:\anaconda3\envs\raganything" />
</component> </component>
<component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\raganything" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\raganything" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>
</project> </project>

@ -41,3 +41,11 @@ python examples/text_format_test.py --check-reportlab --file dummy
# MinerU # MinerU
https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md
# 硅基流动的视觉模型
https://cloud.siliconflow.cn/sft-b86b3myzge/models?tags=%E8%A7%86%E8%A7%89
# 免费的模型
# 调用地址
https://api.siliconflow.cn/v1/chat/completions
model:GLM-4.1V-9B-Thinking

@ -6,17 +6,17 @@ from lightrag.utils import EmbeddingFunc
async def main(): async def main():
# Initialize RAGAnything # Initialize RAGAnything
rag = RAGAnything( rag = RAGAnything(
working_dir="./rag_storage",
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
"gpt-4o-mini", "deepseek-chat",
prompt, prompt,
system_prompt=system_prompt, system_prompt=system_prompt,
history_messages=history_messages, history_messages=history_messages,
api_key="your-api-key", api_key="sk-44ae895eeb614aa1a9c6460579e322f1",
base_url="https://api.deepseek.com"
**kwargs, **kwargs,
), ),
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
"gpt-4o", "GLM-4.1V-9B-Thinking",
"", "",
system_prompt=None, system_prompt=None,
history_messages=[], history_messages=[],
@ -27,14 +27,16 @@ async def main():
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
]} if image_data else {"role": "user", "content": prompt} ]} if image_data else {"role": "user", "content": prompt}
], ],
api_key="your-api-key", api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
base_url='https://api.siliconflow.cn/v1/chat/completions'
**kwargs, **kwargs,
) if image_data else openai_complete_if_cache( ) if image_data else openai_complete_if_cache(
"gpt-4o-mini", "GLM-4.1V-9B-Thinking",
prompt, prompt,
system_prompt=system_prompt, system_prompt=system_prompt,
history_messages=history_messages, history_messages=history_messages,
api_key="your-api-key", api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
base_url='https://api.siliconflow.cn/v1/chat/completions'
**kwargs, **kwargs,
), ),
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(
@ -42,16 +44,16 @@ async def main():
max_token_size=8192, max_token_size=8192,
func=lambda texts: openai_embed( func=lambda texts: openai_embed(
texts, texts,
model="text-embedding-3-large", model="BAAI/bge-m3",
api_key=api_key, api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
base_url=base_url, base_url="https://api.siliconflow.cn/v1/embeddings",
), ),
), ),
) )
# Process a document # Process a document
await rag.process_document_complete( await rag.process_document_complete(
file_path="path/to/your/document.pdf", file_path="../Txt/黄琬乔2023蓝桥杯省赛准考证.pdf",
output_dir="./output", output_dir="./output",
parse_method="auto" parse_method="auto"
) )
@ -59,22 +61,22 @@ async def main():
# Query the processed content # Query the processed content
# Pure text query - for basic knowledge base search # Pure text query - for basic knowledge base search
text_result = await rag.aquery( text_result = await rag.aquery(
"What are the main findings shown in the figures and tables?", "这篇文档中说了什么内容?",
mode="hybrid" mode="hybrid"
) )
print("Text query result:", text_result) print("Text query result:", text_result)
# Multimodal query with specific multimodal content # # Multimodal query with specific multimodal content
multimodal_result = await rag.aquery_with_multimodal( # multimodal_result = await rag.aquery_with_multimodal(
"Explain this formula and its relevance to the document content", # "Explain this formula and its relevance to the document content",
multimodal_content=[{ # multimodal_content=[{
"type": "equation", # "type": "equation",
"latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}", # "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}",
"equation_caption": "Document relevance probability" # "equation_caption": "Document relevance probability"
}], # }],
mode="hybrid" # mode="hybrid"
) # )
print("Multimodal query result:", multimodal_result) # print("Multimodal query result:", multimodal_result)
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())

@ -112,7 +112,7 @@ class MineruParser:
cmd.extend(["-d", device]) cmd.extend(["-d", device])
try: try:
print("转换命令:" + cmd) #print("转换命令:" + cmd)
result = subprocess.run( result = subprocess.run(
cmd, cmd,
capture_output=True, capture_output=True,

Loading…
Cancel
Save