Files
ailine/rag_indexer/__init__.py

61 lines
1.3 KiB
Python
Raw Normal View History

2026-04-18 16:56:23 +08:00
"""
Offline RAG Indexer module.
2026-04-19 15:01:40 +08:00
提供完整的离线索引构建功能包括
- 文档加载PDFWordTXT
- 文本切分递归语义父子块
- 向量嵌入支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法
>>> from rag_indexer import IndexBuilder, SplitterType
>>>
>>> builder = IndexBuilder(
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... qdrant_url="http://localhost:6333"
... )
>>>
>>> builder.build_from_file("document.pdf")
2026-04-18 16:56:23 +08:00
"""
from .loaders import DocumentLoader
from .splitters import (
SplitterType,
2026-04-19 15:01:40 +08:00
get_splitter,
ParentChildSplitter,
2026-04-18 16:56:23 +08:00
)
from .embedders import LlamaCppEmbedder
from .vector_store import QdrantVectorStore
from .builder import IndexBuilder
2026-04-19 15:01:40 +08:00
# 导出存储相关类(从新的 store 包)
from .store import (
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
2026-04-18 16:56:23 +08:00
__all__ = [
2026-04-19 15:01:40 +08:00
# 核心类
2026-04-18 16:56:23 +08:00
"DocumentLoader",
2026-04-19 15:01:40 +08:00
"IndexBuilder",
# 切分相关
2026-04-18 16:56:23 +08:00
"SplitterType",
2026-04-19 15:01:40 +08:00
"get_splitter",
"ParentChildSplitter",
# 嵌入和向量存储
2026-04-18 16:56:23 +08:00
"LlamaCppEmbedder",
"QdrantVectorStore",
2026-04-19 15:01:40 +08:00
# 存储(新的 store 包)
"PostgresDocStore",
"create_docstore",
]