feat(postgres_impl): add vchordrq vector index support and unify vector index creation logic

(cherry picked from commit d07023c962)
This commit is contained in:
wmsnp 2025-11-18 11:45:16 +08:00 committed by Raphaël MANSUY
parent 1cbe0ba885
commit 3954bb6579
3 changed files with 16 additions and 33 deletions

View file

@ -28,10 +28,13 @@ password = your_password
database = your_database database = your_database
# workspace = default # workspace = default
max_connections = 12 max_connections = 12
vector_index_type = HNSW # HNSW or IVFFLAT vector_index_type = HNSW # HNSW, IVFFLAT or VCHORDRQ
hnsw_m = 16 hnsw_m = 16
hnsw_ef = 64 hnsw_ef = 64
ivfflat_lists = 100 ivfflat_lists = 100
vchordrq_build_options =
vchordrq_probes =
vchordrq_epsilon = 1.9
[memgraph] [memgraph]
uri = bolt://localhost:7687 uri = bolt://localhost:7687

View file

@ -305,11 +305,14 @@ POSTGRES_MAX_CONNECTIONS=12
# POSTGRES_WORKSPACE=forced_workspace_name # POSTGRES_WORKSPACE=forced_workspace_name
### PostgreSQL Vector Storage Configuration ### PostgreSQL Vector Storage Configuration
### Vector storage type: HNSW, IVFFlat ### Vector storage type: HNSW, IVFFlat, VCHORDRQ
POSTGRES_VECTOR_INDEX_TYPE=HNSW POSTGRES_VECTOR_INDEX_TYPE=HNSW
POSTGRES_HNSW_M=16 POSTGRES_HNSW_M=16
POSTGRES_HNSW_EF=200 POSTGRES_HNSW_EF=200
POSTGRES_IVFFLAT_LISTS=100 POSTGRES_IVFFLAT_LISTS=100
POSTGRES_VCHORDRQ_BUILD_OPTIONS=
POSTGRES_VCHORDRQ_PROBES=
POSTGRES_VCHORDRQ_EPSILON=1.9
### PostgreSQL Connection Retry Configuration (Network Robustness) ### PostgreSQL Connection Retry Configuration (Network Robustness)
### Number of retry attempts (1-10, default: 3) ### Number of retry attempts (1-10, default: 3)

View file

@ -413,27 +413,12 @@ class PostgreSQLDB:
pass pass
async def configure_vchordrq(self, connection: asyncpg.Connection) -> None: async def configure_vchordrq(self, connection: asyncpg.Connection) -> None:
"""Configure VCHORDRQ extension for vector similarity search. """Configure VCHORDRQ extension for vector similarity search."""
try:
Raises:
asyncpg.exceptions.UndefinedObjectError: If VCHORDRQ extension is not installed
asyncpg.exceptions.InvalidParameterValueError: If parameter value is invalid
Note:
This method does not catch exceptions. Configuration errors will fail-fast,
while transient connection errors will be retried by _run_with_retry.
"""
# Handle probes parameter - only set if non-empty value is provided
if self.vchordrq_probes and str(self.vchordrq_probes).strip():
await connection.execute(f"SET vchordrq.probes TO '{self.vchordrq_probes}'") await connection.execute(f"SET vchordrq.probes TO '{self.vchordrq_probes}'")
logger.debug(f"PostgreSQL, VCHORDRQ probes set to: {self.vchordrq_probes}")
# Handle epsilon parameter independently - check for None to allow 0.0 as valid value
if self.vchordrq_epsilon is not None:
await connection.execute(f"SET vchordrq.epsilon TO {self.vchordrq_epsilon}") await connection.execute(f"SET vchordrq.epsilon TO {self.vchordrq_epsilon}")
logger.debug( except Exception:
f"PostgreSQL, VCHORDRQ epsilon set to: {self.vchordrq_epsilon}" pass
)
async def _migrate_llm_cache_schema(self): async def _migrate_llm_cache_schema(self):
"""Migrate LLM cache schema: add new columns and remove deprecated mode field""" """Migrate LLM cache schema: add new columns and remove deprecated mode field"""
@ -1403,14 +1388,12 @@ class PostgreSQLDB:
CREATE INDEX {{vector_index_name}} CREATE INDEX {{vector_index_name}}
ON {{k}} USING vchordrq (content_vector vector_cosine_ops) ON {{k}} USING vchordrq (content_vector vector_cosine_ops)
{f'WITH (options = $${self.vchordrq_build_options}$$)' if self.vchordrq_build_options else ''} {f'WITH (options = $${self.vchordrq_build_options}$$)' if self.vchordrq_build_options else ''}
""", """
} }
embedding_dim = int(os.environ.get("EMBEDDING_DIM", 1024)) embedding_dim = int(os.environ.get("EMBEDDING_DIM", 1024))
for k in vdb_tables: for k in vdb_tables:
vector_index_name = ( vector_index_name = f"idx_{k.lower()}_{self.vector_index_type.lower()}_cosine"
f"idx_{k.lower()}_{self.vector_index_type.lower()}_cosine"
)
check_vector_index_sql = f""" check_vector_index_sql = f"""
SELECT 1 FROM pg_indexes SELECT 1 FROM pg_indexes
WHERE indexname = '{vector_index_name}' AND tablename = '{k.lower()}' WHERE indexname = '{vector_index_name}' AND tablename = '{k.lower()}'
@ -1422,14 +1405,8 @@ class PostgreSQLDB:
alter_sql = f"ALTER TABLE {k} ALTER COLUMN content_vector TYPE VECTOR({embedding_dim})" alter_sql = f"ALTER TABLE {k} ALTER COLUMN content_vector TYPE VECTOR({embedding_dim})"
await self.execute(alter_sql) await self.execute(alter_sql)
logger.debug(f"Ensured vector dimension for {k}") logger.debug(f"Ensured vector dimension for {k}")
logger.info( logger.info(f"Creating {self.vector_index_type} index {vector_index_name} on table {k}")
f"Creating {self.vector_index_type} index {vector_index_name} on table {k}" await self.execute(create_sql[self.vector_index_type].format(vector_index_name=vector_index_name, k=k))
)
await self.execute(
create_sql[self.vector_index_type].format(
vector_index_name=vector_index_name, k=k
)
)
logger.info( logger.info(
f"Successfully created vector index {vector_index_name} on table {k}" f"Successfully created vector index {vector_index_name} on table {k}"
) )