This commit is contained in:
@@ -23,9 +23,9 @@ Offline RAG Indexer module.
|
||||
>>> await builder.build_from_file("document.pdf")
|
||||
"""
|
||||
|
||||
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
|
||||
from rag_indexer.loaders import DocumentLoader
|
||||
from rag_indexer.splitters import SplitterType, get_splitter
|
||||
from .index_builder import IndexBuilder, IndexBuilderConfig, DocstoreConfig
|
||||
from .loaders import DocumentLoader
|
||||
from .splitters import SplitterType, get_splitter
|
||||
|
||||
# 从 rag_core 重新导出常用组件
|
||||
from rag_core import (
|
||||
|
||||
@@ -7,8 +7,12 @@ import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from rag_indexer.index_builder import IndexBuilder, IndexBuilderConfig
|
||||
from rag_indexer.splitters import SplitterType
|
||||
# 添加项目根目录和 backend 目录到 Python 路径
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
||||
|
||||
from .index_builder import IndexBuilder, IndexBuilderConfig
|
||||
from .splitters import SplitterType
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
|
||||
@@ -6,10 +6,14 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Union, Optional, Any, Dict
|
||||
|
||||
# 添加 backend 目录到路径以导入 rag_core
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
||||
|
||||
from httpx import RemoteProtocolError
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
@@ -17,8 +21,8 @@ from langchain_core.stores import BaseStore
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
|
||||
from qdrant_client.http.exceptions import ResponseHandlingException
|
||||
|
||||
from rag_indexer.loaders import DocumentLoader
|
||||
from rag_indexer.splitters import SplitterType, get_splitter
|
||||
from .loaders import DocumentLoader
|
||||
from .splitters import SplitterType, get_splitter
|
||||
from rag_core import LlamaCppEmbedder, QdrantVectorStore, create_docstore, create_parent_retriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
33
rag_indexer/requirements.txt
Normal file
33
rag_indexer/requirements.txt
Normal file
@@ -0,0 +1,33 @@
|
||||
# RAG Indexer - 本地索引工具依赖
|
||||
# 依赖 rag_core (从 ../backend/rag_core 导入)
|
||||
|
||||
# Core
|
||||
pydantic==2.12.5
|
||||
python-dotenv==1.2.2
|
||||
typing-extensions==4.15.0
|
||||
|
||||
# LangChain (用于文档处理)
|
||||
langchain==1.2.15
|
||||
langchain-community==0.4.1
|
||||
langchain-core==1.2.28
|
||||
tiktoken>=0.12.0
|
||||
|
||||
# Vector DB
|
||||
qdrant-client==1.17.1
|
||||
|
||||
# HTTP
|
||||
httpx==0.28.1
|
||||
|
||||
# Utilities
|
||||
tenacity==9.1.4
|
||||
rich==15.0.0
|
||||
PyYAML==6.0.3
|
||||
numpy>=1.26.2
|
||||
|
||||
# Document Processing
|
||||
unstructured==0.22.21
|
||||
pypdf==6.10.0
|
||||
beautifulsoup4==4.14.3
|
||||
lxml==6.1.0
|
||||
pandas==3.0.2
|
||||
spacy==3.8.14
|
||||
@@ -5,12 +5,10 @@ import sys
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
from backend.rag_core import LlamaCppEmbedder
|
||||
load_dotenv()
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../.."))
|
||||
from rag_core import LlamaCppEmbedder
|
||||
|
||||
load_dotenv()
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
||||
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
|
||||
@@ -10,8 +10,8 @@ import sys
|
||||
# 添加项目根目录到 Python 路径
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
|
||||
from rag_indexer.index_builder import IndexBuilder
|
||||
from rag_indexer.splitters import SplitterType
|
||||
from ..index_builder import IndexBuilder
|
||||
from ..splitters import SplitterType
|
||||
|
||||
async def test_index_builder():
|
||||
"""测试索引构建功能"""
|
||||
|
||||
Reference in New Issue
Block a user