Files
ailine/rag_indexer/clear_qdrant.py

81 lines
2.4 KiB
Python
Raw Normal View History

2026-04-19 22:01:55 +08:00
"""清理 RAG 索引数据。
用法:
python reset_index.py # 清理全部
python reset_index.py --qdrant # 仅清理 Qdrant
python reset_index.py --postgres # 仅清理 PostgreSQL
"""
import asyncio
import os
import argparse
from dotenv import load_dotenv
load_dotenv()
QDRANT_URL = os.getenv("QDRANT_URL", "http://127.0.0.1:6333")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
DB_URI = os.getenv("DB_URI", "postgresql://postgres:huang1998@115.190.121.151:5432/langgraph_db?sslmode=disable")
COLLECTION_NAME = "rag_documents"
TABLE_NAME = "parent_documents"
def clear_qdrant():
"""删除 Qdrant 集合。"""
from qdrant_client import QdrantClient
print("清理 Qdrant...")
client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
collections = client.get_collections().collections
if any(c.name == COLLECTION_NAME for c in collections):
client.delete_collection(COLLECTION_NAME)
print(f" 集合 '{COLLECTION_NAME}' 已删除")
else:
print(f" 集合 '{COLLECTION_NAME}' 不存在")
async def clear_postgres():
"""清空 PostgreSQL 表数据。"""
import asyncpg
print("清理 PostgreSQL...")
conn = await asyncpg.connect(dsn=DB_URI)
try:
exists = await conn.fetchval(
"SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = $1)",
TABLE_NAME
)
if exists:
count = await conn.fetchval(f"SELECT COUNT(*) FROM {TABLE_NAME}")
await conn.execute(f"DELETE FROM {TABLE_NAME}")
print(f"'{TABLE_NAME}' 已清空,删除 {count} 条记录")
else:
print(f"'{TABLE_NAME}' 不存在")
finally:
await conn.close()
async def main():
parser = argparse.ArgumentParser(description="清理 RAG 索引数据")
parser.add_argument("--qdrant", action="store_true", help="仅清理 Qdrant")
parser.add_argument("--postgres", action="store_true", help="仅清理 PostgreSQL")
args = parser.parse_args()
if not args.qdrant and not args.postgres:
args.qdrant = True
args.postgres = True
if args.qdrant:
clear_qdrant()
if args.postgres:
await clear_postgres()
print("\n完成。运行 `python -m rag_indexer.cli` 重建索引")
if __name__ == "__main__":
asyncio.run(main())