This commit is contained in:
80
rag_indexer/test/reset_index.py
Normal file
80
rag_indexer/test/reset_index.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""清理 RAG 索引数据。
|
||||
|
||||
用法:
|
||||
python reset_index.py # 清理全部
|
||||
python reset_index.py --qdrant # 仅清理 Qdrant
|
||||
python reset_index.py --postgres # 仅清理 PostgreSQL
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import argparse
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
|
||||
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
||||
DB_URI = os.getenv("DB_URI", "postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable")
|
||||
COLLECTION_NAME = "rag_documents"
|
||||
TABLE_NAME = "parent_documents"
|
||||
|
||||
|
||||
def clear_qdrant():
|
||||
"""删除 Qdrant 集合。"""
|
||||
from qdrant_client import QdrantClient
|
||||
|
||||
print("清理 Qdrant...")
|
||||
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
|
||||
|
||||
collections = client.get_collections().collections
|
||||
if any(c.name == COLLECTION_NAME for c in collections):
|
||||
client.delete_collection(COLLECTION_NAME)
|
||||
print(f" 集合 '{COLLECTION_NAME}' 已删除")
|
||||
else:
|
||||
print(f" 集合 '{COLLECTION_NAME}' 不存在")
|
||||
|
||||
|
||||
async def clear_postgres():
|
||||
"""清空 PostgreSQL 表数据。"""
|
||||
import asyncpg
|
||||
|
||||
print("清理 PostgreSQL...")
|
||||
conn = await asyncpg.connect(dsn=DB_URI)
|
||||
|
||||
try:
|
||||
exists = await conn.fetchval(
|
||||
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = $1)",
|
||||
TABLE_NAME
|
||||
)
|
||||
if exists:
|
||||
count = await conn.fetchval(f"SELECT COUNT(*) FROM {TABLE_NAME}")
|
||||
await conn.execute(f"DELETE FROM {TABLE_NAME}")
|
||||
print(f" 表 '{TABLE_NAME}' 已清空,删除 {count} 条记录")
|
||||
else:
|
||||
print(f" 表 '{TABLE_NAME}' 不存在")
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(description="清理 RAG 索引数据")
|
||||
parser.add_argument("--qdrant", action="store_true", help="仅清理 Qdrant")
|
||||
parser.add_argument("--postgres", action="store_true", help="仅清理 PostgreSQL")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.qdrant and not args.postgres:
|
||||
args.qdrant = True
|
||||
args.postgres = True
|
||||
|
||||
if args.qdrant:
|
||||
clear_qdrant()
|
||||
|
||||
if args.postgres:
|
||||
await clear_postgres()
|
||||
|
||||
print("\n完成。运行 `python -m rag_indexer.cli` 重建索引")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user