Files
ailine/rag_indexer/__init__.py
root 4209386c77
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 6m22s
refactor: 统一导入方式,移除 sys.path 操作
- 重构所有模块导入,移除 sys.path.insert
- 统一使用 from backend.xxx 的绝对导入方式
- rag_core 包内使用相对导入(from .xxx)
- 移动 visualize_graph.py 到 tools/ 目录
- 添加必要的 __init__.py 文件
- 清理废弃文档和脚本
2026-05-04 12:55:45 +08:00

80 lines
1.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Offline RAG Indexer module.
提供完整的离线索引构建功能,包括:
- 文档加载PDF、Word、TXT 等)
- 文本切分(递归、语义、父子块)
- 向量嵌入(支持 llama.cpp
- 向量存储Qdrant
- 父文档存储PostgreSQL
示例用法:
>>> from rag_indexer import IndexBuilder, IndexBuilderConfig, SplitterType
>>>
>>> config = IndexBuilderConfig(
... collection_name="my_docs",
... splitter_type=SplitterType.PARENT_CHILD,
... )
>>> builder = IndexBuilder(config)
>>>
>>> # 或直接传参(向后兼容)
>>> builder = IndexBuilder(collection_name="my_docs")
>>>
>>> await builder.build_from_file("document.pdf")
"""
from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
from .loaders import DocumentLoader
from .splitters import SplitterType, get_splitter
from .config import (
QDRANT_URL,
QDRANT_API_KEY,
LLAMACPP_EMBEDDING_URL,
LLAMACPP_API_KEY,
DB_URI,
DOCSTORE_URI,
RAG_OCR_LANGUAGES,
RAG_DOC_LANGUAGES,
)
# 从 rag_core 重新导出常用组件
from backend.rag_core import (
LlamaCppEmbedder,
QdrantVectorStore,
PostgresDocStore,
create_docstore,
)
__version__ = "2.0.0"
__all__ = [
# 核心构建器与配置
"IndexBuilder",
"IndexBuilderConfig",
"DocstoreConfig",
# 加载器
"DocumentLoader",
# 切分相关
"SplitterType",
"get_splitter",
# 配置
"QDRANT_URL",
"QDRANT_API_KEY",
"LLAMACPP_EMBEDDING_URL",
"LLAMACPP_API_KEY",
"DB_URI",
"DOCSTORE_URI",
"RAG_OCR_LANGUAGES",
"RAG_DOC_LANGUAGES",
# 嵌入与向量存储
"LlamaCppEmbedder",
"QdrantVectorStore",
# 文档存储
"PostgresDocStore",
"create_docstore",
]