2026-04-18 16:56:23 +08:00
|
|
|
|
"""
|
|
|
|
|
|
Offline RAG Indexer module.
|
2026-04-19 15:01:40 +08:00
|
|
|
|
|
|
|
|
|
|
提供完整的离线索引构建功能,包括:
|
|
|
|
|
|
- 文档加载(PDF、Word、TXT 等)
|
|
|
|
|
|
- 文本切分(递归、语义、父子块)
|
|
|
|
|
|
- 向量嵌入(支持 llama.cpp)
|
|
|
|
|
|
- 向量存储(Qdrant)
|
|
|
|
|
|
- 父文档存储(PostgreSQL)
|
|
|
|
|
|
|
|
|
|
|
|
示例用法:
|
|
|
|
|
|
>>> from rag_indexer import IndexBuilder, SplitterType
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> builder = IndexBuilder(
|
|
|
|
|
|
... collection_name="my_docs",
|
|
|
|
|
|
... splitter_type=SplitterType.PARENT_CHILD,
|
|
|
|
|
|
... qdrant_url="http://localhost:6333"
|
|
|
|
|
|
... )
|
|
|
|
|
|
>>>
|
|
|
|
|
|
>>> builder.build_from_file("document.pdf")
|
2026-04-18 16:56:23 +08:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from .loaders import DocumentLoader
|
|
|
|
|
|
from .splitters import (
|
|
|
|
|
|
SplitterType,
|
2026-04-19 15:01:40 +08:00
|
|
|
|
get_splitter,
|
|
|
|
|
|
ParentChildSplitter,
|
2026-04-18 16:56:23 +08:00
|
|
|
|
)
|
|
|
|
|
|
from .embedders import LlamaCppEmbedder
|
|
|
|
|
|
from .vector_store import QdrantVectorStore
|
|
|
|
|
|
from .builder import IndexBuilder
|
|
|
|
|
|
|
2026-04-19 15:01:40 +08:00
|
|
|
|
# 导出存储相关类(从新的 store 包)
|
|
|
|
|
|
from .store import (
|
|
|
|
|
|
PostgresDocStore,
|
|
|
|
|
|
create_docstore,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__version__ = "2.0.0"
|
|
|
|
|
|
|
2026-04-18 16:56:23 +08:00
|
|
|
|
__all__ = [
|
2026-04-19 15:01:40 +08:00
|
|
|
|
# 核心类
|
2026-04-18 16:56:23 +08:00
|
|
|
|
"DocumentLoader",
|
2026-04-19 15:01:40 +08:00
|
|
|
|
"IndexBuilder",
|
|
|
|
|
|
|
|
|
|
|
|
# 切分相关
|
2026-04-18 16:56:23 +08:00
|
|
|
|
"SplitterType",
|
2026-04-19 15:01:40 +08:00
|
|
|
|
"get_splitter",
|
|
|
|
|
|
"ParentChildSplitter",
|
|
|
|
|
|
|
|
|
|
|
|
# 嵌入和向量存储
|
2026-04-18 16:56:23 +08:00
|
|
|
|
"LlamaCppEmbedder",
|
|
|
|
|
|
"QdrantVectorStore",
|
2026-04-19 15:01:40 +08:00
|
|
|
|
|
|
|
|
|
|
# 存储(新的 store 包)
|
|
|
|
|
|
"PostgresDocStore",
|
|
|
|
|
|
"create_docstore",
|
|
|
|
|
|
]
|