From 3954bb6579e06ad4ba62761b0ec81c1c0a13fa41 Mon Sep 17 00:00:00 2001 From: wmsnp Date: Tue, 18 Nov 2025 11:45:16 +0800 Subject: [PATCH] feat(postgres_impl): add vchordrq vector index support and unify vector index creation logic (cherry picked from commit d07023c962e40844e9a82efafa308086526ba691) --- config.ini.example | 5 ++++- env.example | 5 ++++- lightrag/kg/postgres_impl.py | 39 ++++++++---------------------------- 3 files changed, 16 insertions(+), 33 deletions(-) diff --git a/config.ini.example b/config.ini.example index d3fff063..9d2f688e 100644 --- a/config.ini.example +++ b/config.ini.example @@ -28,10 +28,13 @@ password = your_password database = your_database # workspace = default max_connections = 12 -vector_index_type = HNSW # HNSW or IVFFLAT +vector_index_type = HNSW # HNSW, IVFFLAT or VCHORDRQ hnsw_m = 16 hnsw_ef = 64 ivfflat_lists = 100 +vchordrq_build_options = +vchordrq_probes = +vchordrq_epsilon = 1.9 [memgraph] uri = bolt://localhost:7687 diff --git a/env.example b/env.example index 73f2d7b7..a0082853 100644 --- a/env.example +++ b/env.example @@ -305,11 +305,14 @@ POSTGRES_MAX_CONNECTIONS=12 # POSTGRES_WORKSPACE=forced_workspace_name ### PostgreSQL Vector Storage Configuration -### Vector storage type: HNSW, IVFFlat +### Vector storage type: HNSW, IVFFlat, VCHORDRQ POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_HNSW_M=16 POSTGRES_HNSW_EF=200 POSTGRES_IVFFLAT_LISTS=100 +POSTGRES_VCHORDRQ_BUILD_OPTIONS= +POSTGRES_VCHORDRQ_PROBES= +POSTGRES_VCHORDRQ_EPSILON=1.9 ### PostgreSQL Connection Retry Configuration (Network Robustness) ### Number of retry attempts (1-10, default: 3) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index ba5ec6d7..dcd87250 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -413,27 +413,12 @@ class PostgreSQLDB: pass async def configure_vchordrq(self, connection: asyncpg.Connection) -> None: - """Configure VCHORDRQ extension for vector similarity search. - - Raises: - asyncpg.exceptions.UndefinedObjectError: If VCHORDRQ extension is not installed - asyncpg.exceptions.InvalidParameterValueError: If parameter value is invalid - - Note: - This method does not catch exceptions. Configuration errors will fail-fast, - while transient connection errors will be retried by _run_with_retry. - """ - # Handle probes parameter - only set if non-empty value is provided - if self.vchordrq_probes and str(self.vchordrq_probes).strip(): + """Configure VCHORDRQ extension for vector similarity search.""" + try: await connection.execute(f"SET vchordrq.probes TO '{self.vchordrq_probes}'") - logger.debug(f"PostgreSQL, VCHORDRQ probes set to: {self.vchordrq_probes}") - - # Handle epsilon parameter independently - check for None to allow 0.0 as valid value - if self.vchordrq_epsilon is not None: await connection.execute(f"SET vchordrq.epsilon TO {self.vchordrq_epsilon}") - logger.debug( - f"PostgreSQL, VCHORDRQ epsilon set to: {self.vchordrq_epsilon}" - ) + except Exception: + pass async def _migrate_llm_cache_schema(self): """Migrate LLM cache schema: add new columns and remove deprecated mode field""" @@ -1403,14 +1388,12 @@ class PostgreSQLDB: CREATE INDEX {{vector_index_name}} ON {{k}} USING vchordrq (content_vector vector_cosine_ops) {f'WITH (options = $${self.vchordrq_build_options}$$)' if self.vchordrq_build_options else ''} - """, + """ } embedding_dim = int(os.environ.get("EMBEDDING_DIM", 1024)) for k in vdb_tables: - vector_index_name = ( - f"idx_{k.lower()}_{self.vector_index_type.lower()}_cosine" - ) + vector_index_name = f"idx_{k.lower()}_{self.vector_index_type.lower()}_cosine" check_vector_index_sql = f""" SELECT 1 FROM pg_indexes WHERE indexname = '{vector_index_name}' AND tablename = '{k.lower()}' @@ -1422,14 +1405,8 @@ class PostgreSQLDB: alter_sql = f"ALTER TABLE {k} ALTER COLUMN content_vector TYPE VECTOR({embedding_dim})" await self.execute(alter_sql) logger.debug(f"Ensured vector dimension for {k}") - logger.info( - f"Creating {self.vector_index_type} index {vector_index_name} on table {k}" - ) - await self.execute( - create_sql[self.vector_index_type].format( - vector_index_name=vector_index_name, k=k - ) - ) + logger.info(f"Creating {self.vector_index_type} index {vector_index_name} on table {k}") + await self.execute(create_sql[self.vector_index_type].format(vector_index_name=vector_index_name, k=k)) logger.info( f"Successfully created vector index {vector_index_name} on table {k}" )