feat (metadata) WIP: added metadata GIN index and modified sql queries for metadata filtering to optimize speed
This commit is contained in:
parent
e38387964f
commit
b5cc842708
1 changed files with 90 additions and 24 deletions
|
|
@ -936,6 +936,12 @@ class PostgreSQLDB:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"PostgreSQL, Failed to create vector index, type: {self.vector_index_type}, Got: {e}"
|
f"PostgreSQL, Failed to create vector index, type: {self.vector_index_type}, Got: {e}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Create GIN indexes for JSONB metadata columns
|
||||||
|
try:
|
||||||
|
await self._create_gin_metadata_indexes()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PostgreSQL, Failed to create GIN metadata indexes: {e}")
|
||||||
# Compatibility check - add metadata columns to LIGHTRAG_DOC_CHUNKS and LIGHTRAG_VDB_CHUNKS
|
# Compatibility check - add metadata columns to LIGHTRAG_DOC_CHUNKS and LIGHTRAG_VDB_CHUNKS
|
||||||
try:
|
try:
|
||||||
await self.add_metadata_to_tables()
|
await self.add_metadata_to_tables()
|
||||||
|
|
@ -1272,6 +1278,37 @@ class PostgreSQLDB:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to create ivfflat index on {k}: {e}")
|
logger.error(f"Failed to create ivfflat index on {k}: {e}")
|
||||||
|
|
||||||
|
async def _create_gin_metadata_indexes(self):
|
||||||
|
"""Create GIN indexes for JSONB metadata columns to speed up metadata filtering"""
|
||||||
|
metadata_tables = [
|
||||||
|
"LIGHTRAG_DOC_CHUNKS",
|
||||||
|
"LIGHTRAG_VDB_CHUNKS",
|
||||||
|
"LIGHTRAG_DOC_STATUS",
|
||||||
|
]
|
||||||
|
|
||||||
|
for table in metadata_tables:
|
||||||
|
index_name = f"idx_{table.lower()}_metadata_gin"
|
||||||
|
check_index_sql = f"""
|
||||||
|
SELECT 1 FROM pg_indexes
|
||||||
|
WHERE indexname = '{index_name}'
|
||||||
|
AND tablename = '{table.lower()}'
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
index_exists = await self.query(check_index_sql)
|
||||||
|
if not index_exists:
|
||||||
|
create_index_sql = f"""
|
||||||
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS {index_name}
|
||||||
|
ON {table} USING gin (metadata jsonb_path_ops)
|
||||||
|
"""
|
||||||
|
logger.info(f"PostgreSQL, Creating GIN index {index_name} on table {table}")
|
||||||
|
await self.execute(create_index_sql)
|
||||||
|
logger.info(f"PostgreSQL, Successfully created GIN index {index_name} on table {table}")
|
||||||
|
else:
|
||||||
|
logger.info(f"PostgreSQL, GIN index {index_name} already exists on table {table}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"PostgreSQL, Failed to create GIN index on table {table}, Got: {e}")
|
||||||
|
|
||||||
async def query(
|
async def query(
|
||||||
self,
|
self,
|
||||||
sql: str,
|
sql: str,
|
||||||
|
|
@ -4866,39 +4903,68 @@ SQL_TEMPLATES = {
|
||||||
update_time = EXCLUDED.update_time
|
update_time = EXCLUDED.update_time
|
||||||
""",
|
""",
|
||||||
"relationships": """
|
"relationships": """
|
||||||
SELECT r.source_id AS src_id,
|
WITH top_relationships AS (
|
||||||
r.target_id AS tgt_id,
|
SELECT
|
||||||
EXTRACT(EPOCH FROM r.create_time)::BIGINT AS created_at
|
r.id,
|
||||||
FROM LIGHTRAG_VDB_RELATION r
|
r.source_id,
|
||||||
JOIN LIGHTRAG_VDB_CHUNKS c ON r.chunk_ids && ARRAY[c.id]
|
r.target_id,
|
||||||
WHERE r.workspace = $1
|
r.create_time,
|
||||||
AND r.content_vector <=> '[{embedding_string}]'::vector < $2
|
r.metadata,
|
||||||
{metadata_filter_clause}
|
r.chunk_ids
|
||||||
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
|
FROM
|
||||||
LIMIT $3;
|
LIGHTRAG_VDB_RELATION r
|
||||||
|
WHERE
|
||||||
|
r.workspace = $1
|
||||||
|
-- Apply the metadata filter here to reduce the set of relationships searched
|
||||||
|
{metadata_filter_clause}
|
||||||
|
ORDER BY
|
||||||
|
r.content_vector <=> '[{embedding_string}]'::vector
|
||||||
|
LIMIT $3
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
tr.source_id AS src_id,
|
||||||
|
tr.target_id AS tgt_id,
|
||||||
|
EXTRACT(EPOCH FROM tr.create_time)::BIGINT AS created_at
|
||||||
|
FROM
|
||||||
|
top_relationships tr
|
||||||
|
JOIN LIGHTRAG_VDB_CHUNKS c ON tr.chunk_ids && ARRAY[c.id];
|
||||||
""",
|
""",
|
||||||
"entities": """
|
"entities": """
|
||||||
SELECT e.entity_name,
|
WITH top_entities AS (
|
||||||
EXTRACT(EPOCH FROM e.create_time)::BIGINT AS created_at
|
SELECT
|
||||||
FROM LIGHTRAG_VDB_ENTITY e
|
e.id,
|
||||||
JOIN LIGHTRAG_VDB_CHUNKS c ON e.chunk_ids && ARRAY[c.id]
|
e.entity_name,
|
||||||
WHERE e.workspace = $1
|
e.create_time,
|
||||||
AND e.content_vector <=> '[{embedding_string}]'::vector < $2
|
e.metadata,
|
||||||
{metadata_filter_clause}
|
e.chunk_ids
|
||||||
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
|
FROM LIGHTRAG_VDB_ENTITY e
|
||||||
LIMIT $3;
|
WHERE
|
||||||
|
e.workspace = $1
|
||||||
|
{metadata_filter_clause}
|
||||||
|
ORDER BY
|
||||||
|
e.content_vector <=> '[{embedding_string}]'::vector
|
||||||
|
LIMIT $3)
|
||||||
|
SELECT
|
||||||
|
te.entity_name,
|
||||||
|
EXTRACT(EPOCH FROM te.create_time)::BIGINT AS created_at
|
||||||
|
FROM
|
||||||
|
top_entities te
|
||||||
|
JOIN LIGHTRAG_VDB_CHUNKS c ON te.chunk_ids && ARRAY[c.id];
|
||||||
""",
|
""",
|
||||||
"chunks": """
|
"chunks": """
|
||||||
SELECT c.id,
|
SELECT c.id,
|
||||||
c.content,
|
c.content,
|
||||||
c.file_path,
|
c.file_path,
|
||||||
EXTRACT(EPOCH FROM c.create_time)::BIGINT AS created_at,
|
EXTRACT(EPOCH FROM c.create_time)::BIGINT AS created_at,
|
||||||
c.metadata
|
c.metadata,
|
||||||
|
c.content_vector <=> '[{embedding_string}]'::vector AS distance
|
||||||
|
|
||||||
FROM LIGHTRAG_VDB_CHUNKS c
|
FROM LIGHTRAG_VDB_CHUNKS c
|
||||||
WHERE c.workspace = $1
|
WHERE
|
||||||
AND c.content_vector <=> '[{embedding_string}]'::vector < $2
|
c.workspace = $1
|
||||||
{metadata_filter_clause}
|
{metadata_filter_clause}
|
||||||
ORDER BY c.content_vector <=> '[{embedding_string}]'::vector
|
ORDER BY
|
||||||
|
c.content_vector <=> '[{embedding_string}]'::vector
|
||||||
LIMIT $3;
|
LIMIT $3;
|
||||||
""",
|
""",
|
||||||
# DROP tables
|
# DROP tables
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue