向量数据库
Some checks failed
构建并部署 AI Agent 服务 / deploy (push) Failing after 32m6s

This commit is contained in:
2026-04-18 16:56:23 +08:00
parent 0470afce13
commit c18e8a9860
11 changed files with 1121 additions and 0 deletions

View File

@@ -0,0 +1,124 @@
"""
Example demonstrating ParentDocumentRetriever usage.
This script shows how to:
1. Build an index with parent-child chunking
2. Search with child chunks (fast, precise)
3. Search with parent context (large context)
4. Access the retriever directly for advanced use cases
"""
import logging
from pathlib import Path
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
from builder import IndexBuilder
from splitters import SplitterType
def main():
print("=" * 70)
print("ParentDocumentRetriever Example")
print("=" * 70)
# Step 1: Create IndexBuilder with parent-child splitting
print("\n1. Creating IndexBuilder with parent-child splitting...")
builder = IndexBuilder(
collection_name="parent_child_demo",
splitter_type=SplitterType.PARENT_CHILD,
parent_chunk_size=1000, # Parent chunks: larger context
child_chunk_size=200, # Child chunks: smaller for precision
docstore_path="./my_parent_docs", # Where to store parent chunks
search_k=5, # Number of child chunks to retrieve
)
print(f" Parent splitter: chunk_size={builder.get_parent_splitter().chunk_size}")
print(f" Child splitter: chunk_size={builder.get_child_splitter().chunk_size}")
print(f" Docstore path: {builder.get_docstore_path()}")
print(f" Search k: {builder.retriever.search_kwargs['k']}")
# Step 2: Build index from a sample file
print("\n2. Building index from sample file...")
# Create a test document
test_content = """
This is a test document for demonstrating ParentDocumentRetriever.
Parent chunks contain larger portions of text (1000 characters),
while child chunks are smaller (200 characters) for precise retrieval.
When you search with ParentDocumentRetriever:
- It first retrieves relevant child chunks
- Then replaces them with their corresponding parent chunks
- This gives you large context while maintaining precision
Example search queries:
- "ParentDocumentRetriever"
- "child chunks"
- "large context"
- "precise retrieval"
"""
test_file = Path("./test_document.txt")
test_file.write_text(test_content)
chunk_count = builder.build_from_file(str(test_file))
print(f" Indexed {chunk_count} documents")
# Step 3: Search with child chunks (fast, precise)
print("\n3. Searching with child chunks (fast, precise)...")
child_results = builder.search("ParentDocumentRetriever", k=3)
print(f" Found {len(child_results)} child chunks:")
for i, doc in enumerate(child_results, 1):
print(f" [{i}] {doc.page_content[:100]}...")
# Step 4: Search with parent context (large context)
print("\n4. Searching with parent context (large context)...")
parent_results = builder.search_with_parent_context("ParentDocumentRetriever", k=3)
print(f" Found {len(parent_results)} parent chunks:")
for i, doc in enumerate(parent_results, 1):
print(f" [{i}] {doc.page_content[:150]}...")
# Step 5: Compare results
print("\n5. Comparing child vs parent results...")
print(f" Child chunks total length: {sum(len(d.page_content) for d in child_results)}")
print(f" Parent chunks total length: {sum(len(d.page_content) for d in parent_results)}")
print(f" Ratio: parent/child = {sum(len(d.page_content) for d in parent_results) / max(sum(len(d.page_content) for d in child_results), 1):.2f}x larger")
# Step 6: Access retriever directly
print("\n6. Accessing retriever directly...")
retriever = builder.get_retriever()
print(f" Retriever type: {type(retriever).__name__}")
print(f" Vectorstore: {retriever.vectorstore}")
print(f" Docstore: {retriever.docstore}")
# Step 7: Unified retrieval interface
print("\n7. Using unified retrieval interface...")
unified_results = builder.retrieve("ParentDocumentRetriever", return_parent=True)
print(f" Retrieved {len(unified_results)} documents (with parent context)")
# Step 8: Collection info
print("\n8. Collection info...")
info = builder.get_collection_info()
print(f" Collection: {info['name']}")
print(f" Vectors: {info['vectors_count']}")
print(f" Vector size: {info['vector_size']}")
# Cleanup
print("\n9. Cleaning up...")
builder.close()
print("\n" + "=" * 70)
print("Example completed successfully!")
print("=" * 70)
return builder
if __name__ == "__main__":
builder = main()