Files
ailine/rag_indexer/__init__.py

60 lines
1.4 KiB
Python
Raw Normal View History

2026-04-18 16:56:23 +08:00
"""
Offline RAG Indexer module.
2026-04-19 15:01:40 +08:00
提供完整的离线索引构建功能包括
- 文档加载PDFWordTXT
- 文本切分递归语义父子块
- 向量嵌入支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法
2026-04-19 22:01:55 +08:00
>>> from rag_indexer import IndexBuilder, IndexBuilderConfig, SplitterType
2026-04-19 15:01:40 +08:00
>>>
2026-04-19 22:01:55 +08:00
>>> config = IndexBuilderConfig(
2026-04-19 15:01:40 +08:00
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... )
2026-04-19 22:01:55 +08:00
>>> builder = IndexBuilder(config)
2026-04-19 15:01:40 +08:00
>>>
2026-04-19 22:01:55 +08:00
>>> # 或直接传参(向后兼容)
>>> builder = IndexBuilder(collection_name="my_docs")
>>>
>>> await builder.build_from_file("document.pdf")
2026-04-18 16:56:23 +08:00
"""
2026-04-21 10:26:37 +08:00
from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter
2026-04-18 16:56:23 +08:00
2026-04-19 22:01:55 +08:00
# 从 rag_core 重新导出常用组件
from rag_core import (
LlamaCppEmbedder,
QdrantVectorStore,
2026-04-19 15:01:40 +08:00
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
2026-04-18 16:56:23 +08:00
__all__ = [
2026-04-19 22:01:55 +08:00
# 核心构建器与配置
2026-04-20 01:10:18 +08:00
"index_builder",
2026-04-19 22:01:55 +08:00
"IndexBuilderConfig",
"DocstoreConfig",
# 加载器
"DocumentLoader",
2026-04-19 15:01:40 +08:00
# 切分相关
2026-04-18 16:56:23 +08:00
"SplitterType",
2026-04-19 15:01:40 +08:00
"get_splitter",
2026-04-19 22:01:55 +08:00
# 嵌入与向量存储
2026-04-18 16:56:23 +08:00
"LlamaCppEmbedder",
"QdrantVectorStore",
2026-04-19 15:01:40 +08:00
2026-04-19 22:01:55 +08:00
# 文档存储
2026-04-19 15:01:40 +08:00
"PostgresDocStore",
"create_docstore",
2026-04-19 22:01:55 +08:00
]