refactor!: 完全异步化 RAG 系统,移除 LangChain ParentDocumentRetriever 依赖
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m34s
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m34s
- 重写 rag_core/vector_store.py:完全异步实现 aadd_documents、asimilarity_search - 重写 app/rag/retriever.py:异步混合检索,移除同步兼容代码 - 修改 rag_indexer/index_builder.py:全链路异步调用 - 删除 rag_core/retriever_factory.py:不再使用 LangChain ParentDocumentRetriever - 清理冗余导入和代码:移除 model_services 兼容、不需要的异常导入 - 更新 rag_indexer/README.md:反映新架构 核心改进: - 完全异步化:索引构建和检索全链路 async/await - 自定义实现:不再依赖 LangChain 的 ParentDocumentRetriever - 双向量支持:子文档同时存储 dense + sparse 向量到 Qdrant - 架构清晰:rag_core 公共组件、rag_indexer 索引、app/rag 检索
This commit is contained in:
@@ -7,7 +7,7 @@ import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from backend.rag_core import QdrantVectorStore
|
||||
from backend.rag_core import QdrantHybridStore
|
||||
from backend.app.model_services import get_embedding_service
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ def check_qdrant_data():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
|
||||
# 先获取几个点看看 payload 结构
|
||||
|
||||
@@ -8,7 +8,7 @@ import os
|
||||
import sys
|
||||
|
||||
from qdrant_client import models
|
||||
from backend.rag_core import QdrantVectorStore, get_sparse_embedder
|
||||
from backend.rag_core import QdrantHybridStore, get_sparse_embedder
|
||||
from backend.app.model_services import get_embedding_service
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ def test_dense_retrieval():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
|
||||
query = "黄双银" # 用文档里真正有的名字查询
|
||||
print(f"\n查询: {query}")
|
||||
|
||||
@@ -7,7 +7,7 @@ import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from backend.rag_core import QdrantVectorStore
|
||||
from backend.rag_core import QdrantHybridStore
|
||||
from backend.app.model_services import get_embedding_service
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ async def delete_and_recreate():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
|
||||
# 删除旧集合
|
||||
try:
|
||||
|
||||
@@ -8,7 +8,7 @@ import os
|
||||
import sys
|
||||
|
||||
from qdrant_client import models
|
||||
from backend.rag_core import QdrantVectorStore, get_sparse_embedder
|
||||
from backend.rag_core import QdrantHybridStore, get_sparse_embedder
|
||||
from backend.app.model_services import get_embedding_service
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ def check_qdrant_content():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
|
||||
# 滚动获取前 5 个点
|
||||
@@ -51,7 +51,7 @@ def test_dense_retrieval():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
|
||||
query = "蚂蚁" # 用中文查询
|
||||
print(f"\n查询: {query}")
|
||||
@@ -72,7 +72,7 @@ def test_sparse_retrieval():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
@@ -109,7 +109,7 @@ def test_hybrid_retrieval():
|
||||
print("="*70)
|
||||
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ import os
|
||||
from rag_indexer.index_builder import IndexBuilder
|
||||
from rag_indexer.splitters import SplitterType
|
||||
|
||||
from backend.rag_core import QdrantVectorStore, get_sparse_embedder
|
||||
from backend.rag_core import QdrantHybridStore, get_sparse_embedder
|
||||
from backend.app.model_services import get_embedding_service
|
||||
from qdrant_client import models
|
||||
|
||||
@@ -61,7 +61,7 @@ def test_dense_retrieval():
|
||||
embeddings = get_embedding_service()
|
||||
|
||||
# 创建向量存储
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
|
||||
# 测试查询
|
||||
query = "The Ant and the Grasshopper"
|
||||
@@ -87,7 +87,7 @@ def test_sparse_retrieval_simple():
|
||||
|
||||
# 获取嵌入服务和稀疏嵌入器
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
@@ -133,7 +133,7 @@ def test_hybrid_retrieval_simple():
|
||||
|
||||
# 获取嵌入服务和稀疏嵌入器
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
@@ -189,7 +189,7 @@ def test_parent_child_retrieval_simple():
|
||||
|
||||
# 获取嵌入服务和稀疏嵌入器
|
||||
embeddings = get_embedding_service()
|
||||
vs = QdrantVectorStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
vs = QdrantHybridStore(collection_name="rag_documents", embeddings=embeddings)
|
||||
client = vs.get_qdrant_client()
|
||||
sparse_embedder = get_sparse_embedder()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user