fix (metadata): fixed not working postgres queries, performance needs work
This commit is contained in:
parent
2728bb486e
commit
e04cd3c7d8
1 changed files with 43 additions and 61 deletions
|
|
@ -11,7 +11,12 @@ import configparser
|
||||||
import ssl
|
import ssl
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge, MetadataFilter
|
from lightrag.types import (
|
||||||
|
KnowledgeGraph,
|
||||||
|
KnowledgeGraphNode,
|
||||||
|
KnowledgeGraphEdge,
|
||||||
|
MetadataFilter,
|
||||||
|
)
|
||||||
|
|
||||||
from tenacity import (
|
from tenacity import (
|
||||||
retry,
|
retry,
|
||||||
|
|
@ -935,8 +940,9 @@ class PostgreSQLDB:
|
||||||
try:
|
try:
|
||||||
await self.add_metadata_to_tables()
|
await self.add_metadata_to_tables()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"PostgreSQL, Failed to add metadata columns to existing tables: {e}")
|
logger.error(
|
||||||
|
f"PostgreSQL, Failed to add metadata columns to existing tables: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
# After all tables are created, attempt to migrate timestamp fields
|
# After all tables are created, attempt to migrate timestamp fields
|
||||||
try:
|
try:
|
||||||
|
|
@ -1061,9 +1067,7 @@ class PostgreSQLDB:
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(f"Failed to add metadata column to {table_name}: {e}")
|
||||||
f"Failed to add metadata column to {table_name}: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _migrate_create_full_entities_relations_tables(self):
|
async def _migrate_create_full_entities_relations_tables(self):
|
||||||
"""Create LIGHTRAG_FULL_ENTITIES and LIGHTRAG_FULL_RELATIONS tables if they don't exist"""
|
"""Create LIGHTRAG_FULL_ENTITIES and LIGHTRAG_FULL_RELATIONS tables if they don't exist"""
|
||||||
|
|
@ -2092,7 +2096,9 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
sub_conditions = []
|
sub_conditions = []
|
||||||
for operand in filter_obj.operands:
|
for operand in filter_obj.operands:
|
||||||
if isinstance(operand, dict):
|
if isinstance(operand, dict):
|
||||||
sub_conditions.append("(" + " AND ".join(build_conditions(operand)) + ")")
|
sub_conditions.append(
|
||||||
|
"(" + " AND ".join(build_conditions(operand)) + ")"
|
||||||
|
)
|
||||||
elif isinstance(operand, MetadataFilter):
|
elif isinstance(operand, MetadataFilter):
|
||||||
nested = recurse(operand)
|
nested = recurse(operand)
|
||||||
if nested:
|
if nested:
|
||||||
|
|
@ -2123,7 +2129,11 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
|
|
||||||
#################### query method ###############
|
#################### query method ###############
|
||||||
async def query(
|
async def query(
|
||||||
self, query: str, top_k: int, query_embedding: list[float] = None, metadata_filter: MetadataFilter | None = None
|
self,
|
||||||
|
query: str,
|
||||||
|
top_k: int,
|
||||||
|
query_embedding: list[float] = None,
|
||||||
|
metadata_filter: MetadataFilter | None = None,
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
if query_embedding is not None:
|
if query_embedding is not None:
|
||||||
embedding = query_embedding
|
embedding = query_embedding
|
||||||
|
|
@ -2135,7 +2145,10 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
|
|
||||||
embedding_string = ",".join(map(str, embedding))
|
embedding_string = ",".join(map(str, embedding))
|
||||||
metadata_filter_clause = self.build_metadata_filter_clause(metadata_filter)
|
metadata_filter_clause = self.build_metadata_filter_clause(metadata_filter)
|
||||||
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string, metadata_filter_clause=metadata_filter_clause)
|
sql = SQL_TEMPLATES[self.namespace].format(
|
||||||
|
embedding_string=embedding_string,
|
||||||
|
metadata_filter_clause=metadata_filter_clause,
|
||||||
|
)
|
||||||
params = {
|
params = {
|
||||||
"workspace": self.workspace,
|
"workspace": self.workspace,
|
||||||
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
||||||
|
|
@ -4844,69 +4857,38 @@ SQL_TEMPLATES = {
|
||||||
update_time = EXCLUDED.update_time
|
update_time = EXCLUDED.update_time
|
||||||
""",
|
""",
|
||||||
"relationships": """
|
"relationships": """
|
||||||
WITH relevant_chunks AS (
|
SELECT r.source_id AS src_id,
|
||||||
SELECT id as chunk_id
|
r.target_id AS tgt_id,
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
EXTRACT(EPOCH FROM r.create_time)::BIGINT AS created_at
|
||||||
WHERE ($4::varchar[] IS NULL OR full_doc_id = ANY ($4::varchar[]))
|
FROM LIGHTRAG_VDB_RELATION r
|
||||||
{metadata_filter_clause}
|
JOIN LIGHTRAG_VDB_CHUNKS c ON r.chunk_ids && ARRAY[c.id]
|
||||||
),
|
WHERE r.workspace = $1
|
||||||
rc AS (
|
AND r.content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
{metadata_filter_clause}
|
||||||
FROM relevant_chunks
|
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
|
||||||
)
|
LIMIT $3;
|
||||||
SELECT r.source_id AS src_id,
|
""",
|
||||||
r.target_id AS tgt_id,
|
|
||||||
EXTRACT(EPOCH FROM r.create_time)::BIGINT AS created_at
|
|
||||||
FROM LIGHTRAG_VDB_RELATION r
|
|
||||||
JOIN rc ON TRUE
|
|
||||||
WHERE r.workspace = $1
|
|
||||||
AND r.content_vector <=> '[{embedding_string}]'::vector < $2
|
|
||||||
AND r.chunk_ids && (rc.chunk_arr::varchar[])
|
|
||||||
ORDER BY r.content_vector <=> '[{embedding_string}]'::vector
|
|
||||||
LIMIT $3;
|
|
||||||
""",
|
|
||||||
"entities": """
|
"entities": """
|
||||||
WITH relevant_chunks AS (
|
|
||||||
SELECT id as chunk_id
|
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
|
||||||
WHERE ($4::varchar[] IS NULL OR full_doc_id = ANY ($4::varchar[]))
|
|
||||||
{metadata_filter_clause}
|
|
||||||
),
|
|
||||||
rc AS (
|
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
|
||||||
FROM relevant_chunks
|
|
||||||
)
|
|
||||||
SELECT e.entity_name,
|
SELECT e.entity_name,
|
||||||
EXTRACT(EPOCH FROM e.create_time)::BIGINT AS created_at
|
EXTRACT(EPOCH FROM e.create_time)::BIGINT AS created_at
|
||||||
FROM LIGHTRAG_VDB_ENTITY e
|
FROM LIGHTRAG_VDB_ENTITY e
|
||||||
JOIN rc ON TRUE
|
JOIN LIGHTRAG_VDB_CHUNKS c ON e.chunk_ids && ARRAY[c.id]
|
||||||
WHERE e.workspace = $1
|
WHERE e.workspace = $1
|
||||||
AND e.content_vector <=> '[{embedding_string}]'::vector < $2
|
AND e.content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
AND e.chunk_ids && (rc.chunk_arr::varchar[])
|
{metadata_filter_clause}
|
||||||
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
|
ORDER BY e.content_vector <=> '[{embedding_string}]'::vector
|
||||||
LIMIT $3;
|
LIMIT $3;
|
||||||
""",
|
""",
|
||||||
"chunks": """
|
"chunks": """
|
||||||
WITH relevant_chunks AS (
|
|
||||||
SELECT id as chunk_id
|
|
||||||
FROM LIGHTRAG_VDB_CHUNKS
|
|
||||||
WHERE ($4::varchar[] IS NULL OR full_doc_id = ANY ($4::varchar[]))
|
|
||||||
{metadata_filter_clause}
|
|
||||||
),
|
|
||||||
rc AS (
|
|
||||||
SELECT array_agg(chunk_id) AS chunk_arr
|
|
||||||
FROM relevant_chunks
|
|
||||||
)
|
|
||||||
SELECT c.id,
|
SELECT c.id,
|
||||||
c.content,
|
c.content,
|
||||||
c.file_path,
|
c.file_path,
|
||||||
EXTRACT(EPOCH FROM c.create_time)::BIGINT AS created_at,
|
EXTRACT(EPOCH FROM c.create_time)::BIGINT AS created_at,
|
||||||
c.metadata
|
c.metadata
|
||||||
FROM LIGHTRAG_VDB_CHUNKS c
|
FROM LIGHTRAG_VDB_CHUNKS c
|
||||||
JOIN rc ON TRUE
|
|
||||||
WHERE c.workspace = $1
|
WHERE c.workspace = $1
|
||||||
AND c.content_vector <=> '[{embedding_string}]'::vector < $2
|
AND c.content_vector <=> '[{embedding_string}]'::vector < $2
|
||||||
AND c.id = ANY (rc.chunk_arr)
|
{metadata_filter_clause}
|
||||||
ORDER BY c.content_vector <=> '[{embedding_string}]'::vector
|
ORDER BY c.content_vector <=> '[{embedding_string}]'::vector
|
||||||
LIMIT $3;
|
LIMIT $3;
|
||||||
""",
|
""",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue