""" Offline RAG Indexer module. 提供完整的离线索引构建功能,包括: - 文档加载(PDF、Word、TXT 等) - 文本切分(递归、语义、父子块) - 向量嵌入(支持 llama.cpp) - 向量存储(Qdrant) - 父文档存储(PostgreSQL) 示例用法: >>> from rag_indexer import IndexBuilder, IndexBuilderConfig, SplitterType >>> >>> config = IndexBuilderConfig( ... collection_name="my_docs", ... splitter_type=SplitterType.PARENT_CHILD, ... ) >>> builder = IndexBuilder(config) >>> >>> # 或直接传参(向后兼容) >>> builder = IndexBuilder(collection_name="my_docs") >>> >>> await builder.build_from_file("document.pdf") """ from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig from .loaders import DocumentLoader from .splitters import SplitterType, get_splitter from .config import ( QDRANT_URL, QDRANT_API_KEY, LLAMACPP_EMBEDDING_URL, LLAMACPP_API_KEY, DB_URI, DOCSTORE_URI, RAG_OCR_LANGUAGES, RAG_DOC_LANGUAGES, ) # 从 rag_core 重新导出常用组件 import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "backend")) from backend.rag_core import ( LlamaCppEmbedder, QdrantVectorStore, PostgresDocStore, create_docstore, ) __version__ = "2.0.0" __all__ = [ # 核心构建器与配置 "IndexBuilder", "IndexBuilderConfig", "DocstoreConfig", # 加载器 "DocumentLoader", # 切分相关 "SplitterType", "get_splitter", # 配置 "QDRANT_URL", "QDRANT_API_KEY", "LLAMACPP_EMBEDDING_URL", "LLAMACPP_API_KEY", "DB_URI", "DOCSTORE_URI", "RAG_OCR_LANGUAGES", "RAG_DOC_LANGUAGES", # 嵌入与向量存储 "LlamaCppEmbedder", "QdrantVectorStore", # 文档存储 "PostgresDocStore", "create_docstore", ]