RAG数据库生成

This commit is contained in:
2026-04-19 15:01:40 +08:00
parent c18e8a9860
commit cc8ef41ef9
17 changed files with 1089 additions and 577 deletions

View File

@@ -1,25 +1,60 @@
"""
Offline RAG Indexer module.
提供完整的离线索引构建功能,包括:
- 文档加载PDF、Word、TXT 等)
- 文本切分(递归、语义、父子块)
- 向量嵌入(支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法:
>>> from rag_indexer import IndexBuilder, SplitterType
>>>
>>> builder = IndexBuilder(
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... qdrant_url="http://localhost:6333"
... )
>>>
>>> builder.build_from_file("document.pdf")
"""
from .loaders import DocumentLoader
from .splitters import (
RecursiveSplitter,
SemanticSplitter,
ParentChildSplitter,
SplitterType,
get_splitter,
ParentChildSplitter,
)
from .embedders import LlamaCppEmbedder
from .vector_store import QdrantVectorStore
from .builder import IndexBuilder
# 导出存储相关类(从新的 store 包)
from .store import (
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
__all__ = [
# 核心类
"DocumentLoader",
"RecursiveSplitter",
"SemanticSplitter",
"ParentChildSplitter",
"IndexBuilder",
# 切分相关
"SplitterType",
"get_splitter",
"ParentChildSplitter",
# 嵌入和向量存储
"LlamaCppEmbedder",
"QdrantVectorStore",
"IndexBuilder",
]
# 存储(新的 store 包)
"PostgresDocStore",
"create_docstore",
]