61 lines
1.3 KiB
Python
61 lines
1.3 KiB
Python
"""
|
||
Offline RAG Indexer module.
|
||
|
||
提供完整的离线索引构建功能,包括:
|
||
- 文档加载(PDF、Word、TXT 等)
|
||
- 文本切分(递归、语义、父子块)
|
||
- 向量嵌入(支持 llama.cpp)
|
||
- 向量存储(Qdrant)
|
||
- 父文档存储(PostgreSQL)
|
||
|
||
示例用法:
|
||
>>> from rag_indexer import IndexBuilder, SplitterType
|
||
>>>
|
||
>>> builder = IndexBuilder(
|
||
... collection_name="my_docs",
|
||
... splitter_type=SplitterType.PARENT_CHILD,
|
||
... qdrant_url="http://localhost:6333"
|
||
... )
|
||
>>>
|
||
>>> builder.build_from_file("document.pdf")
|
||
"""
|
||
|
||
from .loaders import DocumentLoader
|
||
from .splitters import (
|
||
SplitterType,
|
||
get_splitter,
|
||
ParentChildSplitter,
|
||
)
|
||
from .embedders import LlamaCppEmbedder
|
||
from .vector_store import QdrantVectorStore
|
||
from .builder import IndexBuilder
|
||
|
||
# 导出存储相关类(从新的 store 包)
|
||
from .store import (
|
||
PostgresDocStore,
|
||
create_docstore,
|
||
)
|
||
|
||
|
||
|
||
__version__ = "2.0.0"
|
||
|
||
__all__ = [
|
||
# 核心类
|
||
"DocumentLoader",
|
||
"IndexBuilder",
|
||
|
||
# 切分相关
|
||
"SplitterType",
|
||
"get_splitter",
|
||
"ParentChildSplitter",
|
||
|
||
# 嵌入和向量存储
|
||
"LlamaCppEmbedder",
|
||
"QdrantVectorStore",
|
||
|
||
# 存储(新的 store 包)
|
||
"PostgresDocStore",
|
||
"create_docstore",
|
||
]
|