60 lines
1.4 KiB
Python
60 lines
1.4 KiB
Python
"""
|
||
Offline RAG Indexer module.
|
||
|
||
提供完整的离线索引构建功能,包括:
|
||
- 文档加载(PDF、Word、TXT 等)
|
||
- 文本切分(递归、语义、父子块)
|
||
- 向量嵌入(支持 llama.cpp)
|
||
- 向量存储(Qdrant)
|
||
- 父文档存储(PostgreSQL)
|
||
|
||
示例用法:
|
||
>>> from rag_indexer import IndexBuilder, IndexBuilderConfig, SplitterType
|
||
>>>
|
||
>>> config = IndexBuilderConfig(
|
||
... collection_name="my_docs",
|
||
... splitter_type=SplitterType.PARENT_CHILD,
|
||
... )
|
||
>>> builder = IndexBuilder(config)
|
||
>>>
|
||
>>> # 或直接传参(向后兼容)
|
||
>>> builder = IndexBuilder(collection_name="my_docs")
|
||
>>>
|
||
>>> await builder.build_from_file("document.pdf")
|
||
"""
|
||
|
||
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
|
||
from rag_indexer.loaders import DocumentLoader
|
||
from rag_indexer.splitters import SplitterType, get_splitter
|
||
|
||
# 从 rag_core 重新导出常用组件
|
||
from rag_core import (
|
||
LlamaCppEmbedder,
|
||
QdrantVectorStore,
|
||
PostgresDocStore,
|
||
create_docstore,
|
||
)
|
||
|
||
__version__ = "2.0.0"
|
||
|
||
__all__ = [
|
||
# 核心构建器与配置
|
||
"index_builder",
|
||
"IndexBuilderConfig",
|
||
"DocstoreConfig",
|
||
|
||
# 加载器
|
||
"DocumentLoader",
|
||
|
||
# 切分相关
|
||
"SplitterType",
|
||
"get_splitter",
|
||
|
||
# 嵌入与向量存储
|
||
"LlamaCppEmbedder",
|
||
"QdrantVectorStore",
|
||
|
||
# 文档存储
|
||
"PostgresDocStore",
|
||
"create_docstore",
|
||
] |