Files
ailine/rag_indexer/__init__.py
2026-04-19 15:01:40 +08:00

61 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Offline RAG Indexer module.
提供完整的离线索引构建功能,包括:
- 文档加载PDF、Word、TXT 等)
- 文本切分(递归、语义、父子块)
- 向量嵌入(支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法:
>>> from rag_indexer import IndexBuilder, SplitterType
>>>
>>> builder = IndexBuilder(
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... qdrant_url="http://localhost:6333"
... )
>>>
>>> builder.build_from_file("document.pdf")
"""
from .loaders import DocumentLoader
from .splitters import (
SplitterType,
get_splitter,
ParentChildSplitter,
)
from .embedders import LlamaCppEmbedder
from .vector_store import QdrantVectorStore
from .builder import IndexBuilder
# 导出存储相关类(从新的 store 包)
from .store import (
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
__all__ = [
# 核心类
"DocumentLoader",
"IndexBuilder",
# 切分相关
"SplitterType",
"get_splitter",
"ParentChildSplitter",
# 嵌入和向量存储
"LlamaCppEmbedder",
"QdrantVectorStore",
# 存储(新的 store 包)
"PostgresDocStore",
"create_docstore",
]