""" Document store manager for ParentDocumentRetriever. Supports both LocalFileStore (default) and custom PostgreSQL-backed stores. """ import os from typing import Optional from langchain.storage import BaseStore, LocalFileStore def get_docstore(persist_path: str = None) -> LocalFileStore: """ Create and return a document store for parent chunks. Args: persist_path: Path to store parent documents. Defaults to ./parent_docs or HERMES_HOME/parent_docs if set. """ if persist_path is None: # Use HERMES_HOME if available, otherwise default to current directory persist_path = os.getenv("HERMES_HOME") if persist_path: persist_path = os.path.join(persist_path, "parent_docs") else: persist_path = "./parent_docs" os.makedirs(persist_path, exist_ok=True) return LocalFileStore(persist_path) class PostgresDocStore(BaseStore): """ PostgreSQL-backed document store for parent chunks. This is an optional advanced feature. For most use cases, LocalFileStore is sufficient and simpler. """ def __init__(self, connection_string: str): """ Initialize PostgreSQL document store. Args: connection_string: PostgreSQL connection URL """ import psycopg2 from psycopg2 import sql self.conn_string = connection_string self._conn = None # Create table if not exists self._create_table() def _create_table(self): """Create the parent documents table if not exists.""" try: self._conn = psycopg2.connect(self.conn_string) cursor = self._conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS parent_documents ( key TEXT PRIMARY KEY, value JSONB NOT NULL, created_at TIMESTAMPTZ DEFAULT NOW() ) """) self._conn.commit() cursor.close() except Exception as e: raise RuntimeError(f"Failed to create PostgreSQL table: {e}") def get(self, key: str) -> Optional[dict]: """Retrieve a document by key.""" try: self._ensure_connection() cursor = self._conn.cursor() cursor.execute("SELECT value FROM parent_documents WHERE key = %s", (key,)) row = cursor.fetchone() cursor.close() if row: import json return json.loads(row[0]) return None except Exception as e: raise RuntimeError(f"Failed to retrieve document: {e}") def set(self, key: str, value: dict) -> None: """Store a document.""" try: self._ensure_connection() cursor = self._conn.cursor() # Upsert insert_query = sql.SQL( "INSERT INTO parent_documents (key, value) VALUES (%s, %s)" ) update_query = sql.SQL( "UPDATE parent_documents SET value = %s WHERE key = %s" ) cursor.execute(insert_query, (key, json.dumps(value))) try: cursor.execute(update_query, (key, json.dumps(value))) except psycopg2.IntegrityError: pass # Key exists, ignore self._conn.commit() cursor.close() except Exception as e: raise RuntimeError(f"Failed to store document: {e}") def _ensure_connection(self): """Ensure we have an open connection.""" if self._conn is None or self._conn.closed: self._conn = psycopg2.connect(self.conn_string) def close(self): """Close the connection.""" if self._conn and not self._conn.closed: self._conn.close() # Factory function for creating custom docstores # Returns a tuple: (BaseStore instance, connection_string or None) def create_docstore( store_type: str = "local", persist_path: str = None, connection_string: str = None ) -> tuple: """ Factory function to create different types of document stores. Args: store_type: "local" (default), "postgres" persist_path: Path for local file store connection_string: PostgreSQL connection string Returns: Tuple of (BaseStore instance, connection_string or None) """ if store_type == "postgres" and connection_string: return (PostgresDocStore(connection_string), connection_string) else: return (get_docstore(persist_path), None)