Files
ailine/rag_indexer/__init__.py
2026-04-21 20:49:10 +08:00

84 lines
1.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Offline RAG Indexer module.
提供完整的离线索引构建功能,包括:
- 文档加载PDF、Word、TXT 等)
- 文本切分(递归、语义、父子块)
- 向量嵌入(支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法:
>>> from rag_indexer import IndexBuilder, IndexBuilderConfig, SplitterType
>>>
>>> config = IndexBuilderConfig(
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... )
>>> builder = IndexBuilder(config)
>>>
>>> # 或直接传参(向后兼容)
>>> builder = IndexBuilder(collection_name="my_docs")
>>>
>>> await builder.build_from_file("document.pdf")
"""
from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter
from .config import (
QDRANT_URL,
QDRANT_API_KEY,
LLAMACPP_EMBEDDING_URL,
LLAMACPP_API_KEY,
DB_URI,
DOCSTORE_URI,
RAG_OCR_LANGUAGES,
RAG_DOC_LANGUAGES,
)
# 从 rag_core 重新导出常用组件
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from backend.rag_core import (
LlamaCppEmbedder,
QdrantVectorStore,
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
__all__ = [
# 核心构建器与配置
"IndexBuilder",
"IndexBuilderConfig",
"DocstoreConfig",
# 加载器
"DocumentLoader",
# 切分相关
"SplitterType",
"get_splitter",
# 配置
"QDRANT_URL",
"QDRANT_API_KEY",
"LLAMACPP_EMBEDDING_URL",
"LLAMACPP_API_KEY",
"DB_URI",
"DOCSTORE_URI",
"RAG_OCR_LANGUAGES",
"RAG_DOC_LANGUAGES",
# 嵌入与向量存储
"LlamaCppEmbedder",
"QdrantVectorStore",
# 文档存储
"PostgresDocStore",
"create_docstore",
]