2026-04-18 16:31:48 +08:00
|
|
|
|
"""
|
2026-04-19 22:01:55 +08:00
|
|
|
|
RAG 检索与生成模块
|
2026-04-18 16:31:48 +08:00
|
|
|
|
|
2026-04-19 22:01:55 +08:00
|
|
|
|
提供在线检索与生成功能,包括:
|
2026-04-20 01:10:18 +08:00
|
|
|
|
- 基础向量检索(稠密向量 / 混合检索)
|
|
|
|
|
|
- 重排序(Cross-Encoder)
|
|
|
|
|
|
- 多路查询改写(Multi-Query)
|
|
|
|
|
|
- RRF 融合(Reciprocal Rank Fusion)
|
|
|
|
|
|
- 完整的 RAG 流水线
|
|
|
|
|
|
- Agent 工具封装
|
|
|
|
|
|
|
|
|
|
|
|
固定流水线:
|
|
|
|
|
|
用户查询 → 多路改写 → 并行检索 → RRF 融合 → 重排序 → 返回父文档
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
|
|
|
|
|
示例用法:
|
2026-04-20 01:10:18 +08:00
|
|
|
|
>>> from app.rag import RAGPipeline, create_rag_tool
|
|
|
|
|
|
>>> from rag_indexer.builder import IndexBuilder, IndexBuilderConfig
|
|
|
|
|
|
>>> from langchain_openai import ChatOpenAI
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 获取基础检索器(如父子块检索器)
|
|
|
|
|
|
>>> config = IndexBuilderConfig(collection_name="my_docs")
|
|
|
|
|
|
>>> builder = IndexBuilder(config)
|
|
|
|
|
|
>>> retriever = builder.retriever
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 创建 LLM 和流水线
|
|
|
|
|
|
>>> llm = ChatOpenAI(model="gpt-3.5-turbo")
|
|
|
|
|
|
>>> pipeline = RAGPipeline(retriever=retriever, llm=llm)
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 检索
|
|
|
|
|
|
>>> docs = await pipeline.aretrieve("什么是 RAG?")
|
|
|
|
|
|
>>> context = pipeline.format_context(docs)
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> # 创建 Agent 工具
|
|
|
|
|
|
>>> rag_tool = create_rag_tool(retriever=retriever, llm=llm)
|
2026-04-18 16:31:48 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
2026-04-19 22:01:55 +08:00
|
|
|
|
from .retriever import (
|
|
|
|
|
|
create_base_retriever,
|
|
|
|
|
|
create_hybrid_retriever,
|
|
|
|
|
|
create_qdrant_client,
|
|
|
|
|
|
)
|
2026-04-20 14:05:57 +08:00
|
|
|
|
from .reranker import LLaMaCPPReranker
|
2026-04-20 01:10:18 +08:00
|
|
|
|
from .query_transform import MultiQueryGenerator
|
|
|
|
|
|
from .fusion import reciprocal_rank_fusion
|
|
|
|
|
|
from .pipeline import RAGPipeline
|
|
|
|
|
|
from .tools import create_rag_tool, create_rag_tool_sync
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-04-18 16:31:48 +08:00
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
2026-04-20 01:10:18 +08:00
|
|
|
|
# 检索器工厂函数
|
2026-04-18 16:31:48 +08:00
|
|
|
|
"create_base_retriever",
|
2026-04-19 22:01:55 +08:00
|
|
|
|
"create_hybrid_retriever",
|
|
|
|
|
|
"create_qdrant_client",
|
|
|
|
|
|
|
|
|
|
|
|
# 重排序器
|
2026-04-20 14:05:57 +08:00
|
|
|
|
"LLaMaCPPReranker",
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-04-20 01:10:18 +08:00
|
|
|
|
# 查询改写生成器
|
|
|
|
|
|
"MultiQueryGenerator",
|
|
|
|
|
|
|
|
|
|
|
|
# 融合算法
|
|
|
|
|
|
"reciprocal_rank_fusion",
|
2026-04-19 22:01:55 +08:00
|
|
|
|
|
2026-04-20 01:10:18 +08:00
|
|
|
|
# 主流水线
|
2026-04-19 22:01:55 +08:00
|
|
|
|
"RAGPipeline",
|
|
|
|
|
|
|
2026-04-20 01:10:18 +08:00
|
|
|
|
# 工具创建(供 Agent 使用)
|
|
|
|
|
|
"create_rag_tool",
|
|
|
|
|
|
"create_rag_tool_sync",
|
|
|
|
|
|
]
|