Fix: Resolve workspace isolation problem for PostgreSQL with multiple LightRAG instances
This commit is contained in:
parent
d9c1f935f5
commit
ca00b9c8ee
1 changed files with 206 additions and 178 deletions
|
|
@ -30,6 +30,7 @@ from ..base import (
|
||||||
from ..namespace import NameSpace, is_namespace
|
from ..namespace import NameSpace, is_namespace
|
||||||
from ..utils import logger
|
from ..utils import logger
|
||||||
from ..constants import GRAPH_FIELD_SEP
|
from ..constants import GRAPH_FIELD_SEP
|
||||||
|
from ..kg.shared_storage import get_graph_db_lock
|
||||||
|
|
||||||
import pipmaster as pm
|
import pipmaster as pm
|
||||||
|
|
||||||
|
|
@ -1220,9 +1221,6 @@ class PostgreSQLDB:
|
||||||
with_age: bool = False,
|
with_age: bool = False,
|
||||||
graph_name: str | None = None,
|
graph_name: str | None = None,
|
||||||
) -> dict[str, Any] | None | list[dict[str, Any]]:
|
) -> dict[str, Any] | None | list[dict[str, Any]]:
|
||||||
# start_time = time.time()
|
|
||||||
# logger.info(f"PostgreSQL, Querying:\n{sql}")
|
|
||||||
|
|
||||||
async with self.pool.acquire() as connection: # type: ignore
|
async with self.pool.acquire() as connection: # type: ignore
|
||||||
if with_age and graph_name:
|
if with_age and graph_name:
|
||||||
await self.configure_age(connection, graph_name) # type: ignore
|
await self.configure_age(connection, graph_name) # type: ignore
|
||||||
|
|
@ -1248,10 +1246,6 @@ class PostgreSQLDB:
|
||||||
else:
|
else:
|
||||||
data = None
|
data = None
|
||||||
|
|
||||||
# query_time = time.time() - start_time
|
|
||||||
# logger.info(f"PostgreSQL, Query result len: {len(data)}")
|
|
||||||
# logger.info(f"PostgreSQL, Query execution time: {query_time:.4f}s")
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"PostgreSQL database, error:{e}")
|
logger.error(f"PostgreSQL database, error:{e}")
|
||||||
|
|
@ -1414,18 +1408,16 @@ class PGKVStorage(BaseKVStorage):
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
if self.db is None:
|
if self.db is None:
|
||||||
self.db = await ClientManager.get_client()
|
self.db = await ClientManager.get_client()
|
||||||
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
||||||
if self.db.workspace:
|
if self.db.workspace:
|
||||||
# Use PostgreSQLDB's workspace (highest priority)
|
# Use PostgreSQLDB's workspace (highest priority)
|
||||||
final_workspace = self.db.workspace
|
self.workspace = self.db.workspace
|
||||||
elif hasattr(self, "workspace") and self.workspace:
|
elif hasattr(self, "workspace") and self.workspace:
|
||||||
# Use storage class's workspace (medium priority)
|
# Use storage class's workspace (medium priority)
|
||||||
final_workspace = self.workspace
|
pass
|
||||||
self.db.workspace = final_workspace
|
else:
|
||||||
else:
|
# Use "default" for compatibility (lowest priority)
|
||||||
# Use "default" for compatibility (lowest priority)
|
self.workspace = "default"
|
||||||
final_workspace = "default"
|
|
||||||
self.db.workspace = final_workspace
|
|
||||||
|
|
||||||
async def finalize(self):
|
async def finalize(self):
|
||||||
if self.db is not None:
|
if self.db is not None:
|
||||||
|
|
@ -1441,11 +1433,13 @@ class PGKVStorage(BaseKVStorage):
|
||||||
"""
|
"""
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for get_all: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for get_all: {self.namespace}"
|
||||||
|
)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
sql = f"SELECT * FROM {table_name} WHERE workspace=$1"
|
sql = f"SELECT * FROM {table_name} WHERE workspace=$1"
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = await self.db.query(sql, params, multirows=True)
|
results = await self.db.query(sql, params, multirows=True)
|
||||||
|
|
@ -1533,13 +1527,15 @@ class PGKVStorage(BaseKVStorage):
|
||||||
# For other namespaces, return as-is
|
# For other namespaces, return as-is
|
||||||
return {row["id"]: row for row in results}
|
return {row["id"]: row for row in results}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error retrieving all data from {self.namespace}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error retrieving all data from {self.namespace}: {e}"
|
||||||
|
)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
||||||
"""Get data by id."""
|
"""Get data by id."""
|
||||||
sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
|
sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
|
||||||
params = {"workspace": self.db.workspace, "id": id}
|
params = {"workspace": self.workspace, "id": id}
|
||||||
response = await self.db.query(sql, params)
|
response = await self.db.query(sql, params)
|
||||||
|
|
||||||
if response and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
|
if response and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
|
||||||
|
|
@ -1619,7 +1615,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
sql = SQL_TEMPLATES["get_by_ids_" + self.namespace].format(
|
sql = SQL_TEMPLATES["get_by_ids_" + self.namespace].format(
|
||||||
ids=",".join([f"'{id}'" for id in ids])
|
ids=",".join([f"'{id}'" for id in ids])
|
||||||
)
|
)
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
results = await self.db.query(sql, params, multirows=True)
|
results = await self.db.query(sql, params, multirows=True)
|
||||||
|
|
||||||
if results and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
|
if results and is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
|
||||||
|
|
@ -1706,7 +1702,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
table_name=namespace_to_table_name(self.namespace),
|
table_name=namespace_to_table_name(self.namespace),
|
||||||
ids=",".join([f"'{id}'" for id in keys]),
|
ids=",".join([f"'{id}'" for id in keys]),
|
||||||
)
|
)
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
try:
|
try:
|
||||||
res = await self.db.query(sql, params, multirows=True)
|
res = await self.db.query(sql, params, multirows=True)
|
||||||
if res:
|
if res:
|
||||||
|
|
@ -1717,13 +1713,13 @@ class PGKVStorage(BaseKVStorage):
|
||||||
return new_keys
|
return new_keys
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"PostgreSQL database,\nsql:{sql},\nparams:{params},\nerror:{e}"
|
f"[{self.workspace}] PostgreSQL database,\nsql:{sql},\nparams:{params},\nerror:{e}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
################ INSERT METHODS ################
|
################ INSERT METHODS ################
|
||||||
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
||||||
logger.debug(f"Inserting {len(data)} to {self.namespace}")
|
logger.debug(f"[{self.workspace}] Inserting {len(data)} to {self.namespace}")
|
||||||
if not data:
|
if not data:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -1733,7 +1729,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_text_chunk"]
|
upsert_sql = SQL_TEMPLATES["upsert_text_chunk"]
|
||||||
_data = {
|
_data = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
"tokens": v["tokens"],
|
"tokens": v["tokens"],
|
||||||
"chunk_order_index": v["chunk_order_index"],
|
"chunk_order_index": v["chunk_order_index"],
|
||||||
|
|
@ -1751,14 +1747,14 @@ class PGKVStorage(BaseKVStorage):
|
||||||
_data = {
|
_data = {
|
||||||
"id": k,
|
"id": k,
|
||||||
"content": v["content"],
|
"content": v["content"],
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
}
|
}
|
||||||
await self.db.execute(upsert_sql, _data)
|
await self.db.execute(upsert_sql, _data)
|
||||||
elif is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
|
elif is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
|
upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
|
||||||
_data = {
|
_data = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": k, # Use flattened key as id
|
"id": k, # Use flattened key as id
|
||||||
"original_prompt": v["original_prompt"],
|
"original_prompt": v["original_prompt"],
|
||||||
"return_value": v["return"],
|
"return_value": v["return"],
|
||||||
|
|
@ -1778,7 +1774,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_full_entities"]
|
upsert_sql = SQL_TEMPLATES["upsert_full_entities"]
|
||||||
_data = {
|
_data = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
"entity_names": json.dumps(v["entity_names"]),
|
"entity_names": json.dumps(v["entity_names"]),
|
||||||
"count": v["count"],
|
"count": v["count"],
|
||||||
|
|
@ -1792,7 +1788,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_full_relations"]
|
upsert_sql = SQL_TEMPLATES["upsert_full_relations"]
|
||||||
_data = {
|
_data = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
"relation_pairs": json.dumps(v["relation_pairs"]),
|
"relation_pairs": json.dumps(v["relation_pairs"]),
|
||||||
"count": v["count"],
|
"count": v["count"],
|
||||||
|
|
@ -1819,20 +1815,22 @@ class PGKVStorage(BaseKVStorage):
|
||||||
|
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for deletion: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for deletion: {self.namespace}"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.db.execute(
|
await self.db.execute(delete_sql, {"workspace": self.workspace, "ids": ids})
|
||||||
delete_sql, {"workspace": self.db.workspace, "ids": ids}
|
|
||||||
)
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Successfully deleted {len(ids)} records from {self.namespace}"
|
f"[{self.workspace}] Successfully deleted {len(ids)} records from {self.namespace}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting records from {self.namespace}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error while deleting records from {self.namespace}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def drop(self) -> dict[str, str]:
|
async def drop(self) -> dict[str, str]:
|
||||||
"""Drop the storage"""
|
"""Drop the storage"""
|
||||||
|
|
@ -1847,7 +1845,7 @@ class PGKVStorage(BaseKVStorage):
|
||||||
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
||||||
table_name=table_name
|
table_name=table_name
|
||||||
)
|
)
|
||||||
await self.db.execute(drop_sql, {"workspace": self.db.workspace})
|
await self.db.execute(drop_sql, {"workspace": self.workspace})
|
||||||
return {"status": "success", "message": "data dropped"}
|
return {"status": "success", "message": "data dropped"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
@ -1871,18 +1869,16 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
if self.db is None:
|
if self.db is None:
|
||||||
self.db = await ClientManager.get_client()
|
self.db = await ClientManager.get_client()
|
||||||
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
||||||
if self.db.workspace:
|
if self.db.workspace:
|
||||||
# Use PostgreSQLDB's workspace (highest priority)
|
# Use PostgreSQLDB's workspace (highest priority)
|
||||||
final_workspace = self.db.workspace
|
self.workspace = self.db.workspace
|
||||||
elif hasattr(self, "workspace") and self.workspace:
|
elif hasattr(self, "workspace") and self.workspace:
|
||||||
# Use storage class's workspace (medium priority)
|
# Use storage class's workspace (medium priority)
|
||||||
final_workspace = self.workspace
|
pass
|
||||||
self.db.workspace = final_workspace
|
else:
|
||||||
else:
|
# Use "default" for compatibility (lowest priority)
|
||||||
# Use "default" for compatibility (lowest priority)
|
self.workspace = "default"
|
||||||
final_workspace = "default"
|
|
||||||
self.db.workspace = final_workspace
|
|
||||||
|
|
||||||
async def finalize(self):
|
async def finalize(self):
|
||||||
if self.db is not None:
|
if self.db is not None:
|
||||||
|
|
@ -1895,7 +1891,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
try:
|
try:
|
||||||
upsert_sql = SQL_TEMPLATES["upsert_chunk"]
|
upsert_sql = SQL_TEMPLATES["upsert_chunk"]
|
||||||
data: dict[str, Any] = {
|
data: dict[str, Any] = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": item["__id__"],
|
"id": item["__id__"],
|
||||||
"tokens": item["tokens"],
|
"tokens": item["tokens"],
|
||||||
"chunk_order_index": item["chunk_order_index"],
|
"chunk_order_index": item["chunk_order_index"],
|
||||||
|
|
@ -1907,7 +1903,9 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
"update_time": current_time,
|
"update_time": current_time,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error to prepare upsert,\nsql: {e}\nitem: {item}"
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
|
|
@ -1923,7 +1921,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
chunk_ids = [source_id]
|
chunk_ids = [source_id]
|
||||||
|
|
||||||
data: dict[str, Any] = {
|
data: dict[str, Any] = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": item["__id__"],
|
"id": item["__id__"],
|
||||||
"entity_name": item["entity_name"],
|
"entity_name": item["entity_name"],
|
||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
|
|
@ -1946,7 +1944,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
chunk_ids = [source_id]
|
chunk_ids = [source_id]
|
||||||
|
|
||||||
data: dict[str, Any] = {
|
data: dict[str, Any] = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": item["__id__"],
|
"id": item["__id__"],
|
||||||
"source_id": item["src_id"],
|
"source_id": item["src_id"],
|
||||||
"target_id": item["tgt_id"],
|
"target_id": item["tgt_id"],
|
||||||
|
|
@ -1960,7 +1958,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
|
|
||||||
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
||||||
logger.debug(f"Inserting {len(data)} to {self.namespace}")
|
logger.debug(f"[{self.workspace}] Inserting {len(data)} to {self.namespace}")
|
||||||
if not data:
|
if not data:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -2009,7 +2007,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
# Use parameterized document IDs (None means search across all documents)
|
# Use parameterized document IDs (None means search across all documents)
|
||||||
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
|
sql = SQL_TEMPLATES[self.namespace].format(embedding_string=embedding_string)
|
||||||
params = {
|
params = {
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"doc_ids": ids,
|
"doc_ids": ids,
|
||||||
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
"closer_than_threshold": 1 - self.cosine_better_than_threshold,
|
||||||
"top_k": top_k,
|
"top_k": top_k,
|
||||||
|
|
@ -2032,20 +2030,22 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
|
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for vector deletion: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for vector deletion: {self.namespace}"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.db.execute(
|
await self.db.execute(delete_sql, {"workspace": self.workspace, "ids": ids})
|
||||||
delete_sql, {"workspace": self.db.workspace, "ids": ids}
|
|
||||||
)
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Successfully deleted {len(ids)} vectors from {self.namespace}"
|
f"[{self.workspace}] Successfully deleted {len(ids)} vectors from {self.namespace}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error while deleting vectors from {self.namespace}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def delete_entity(self, entity_name: str) -> None:
|
async def delete_entity(self, entity_name: str) -> None:
|
||||||
"""Delete an entity by its name from the vector storage.
|
"""Delete an entity by its name from the vector storage.
|
||||||
|
|
@ -2059,11 +2059,13 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
WHERE workspace=$1 AND entity_name=$2"""
|
WHERE workspace=$1 AND entity_name=$2"""
|
||||||
|
|
||||||
await self.db.execute(
|
await self.db.execute(
|
||||||
delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
|
delete_sql, {"workspace": self.workspace, "entity_name": entity_name}
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"[{self.workspace}] Successfully deleted entity {entity_name}"
|
||||||
)
|
)
|
||||||
logger.debug(f"Successfully deleted entity {entity_name}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting entity {entity_name}: {e}")
|
logger.error(f"[{self.workspace}] Error deleting entity {entity_name}: {e}")
|
||||||
|
|
||||||
async def delete_entity_relation(self, entity_name: str) -> None:
|
async def delete_entity_relation(self, entity_name: str) -> None:
|
||||||
"""Delete all relations associated with an entity.
|
"""Delete all relations associated with an entity.
|
||||||
|
|
@ -2077,11 +2079,15 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
WHERE workspace=$1 AND (source_id=$2 OR target_id=$2)"""
|
WHERE workspace=$1 AND (source_id=$2 OR target_id=$2)"""
|
||||||
|
|
||||||
await self.db.execute(
|
await self.db.execute(
|
||||||
delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
|
delete_sql, {"workspace": self.workspace, "entity_name": entity_name}
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"[{self.workspace}] Successfully deleted relations for entity {entity_name}"
|
||||||
)
|
)
|
||||||
logger.debug(f"Successfully deleted relations for entity {entity_name}")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error deleting relations for entity {entity_name}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
||||||
"""Get vector data by its ID
|
"""Get vector data by its ID
|
||||||
|
|
@ -2094,11 +2100,13 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
"""
|
"""
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for ID lookup: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for ID lookup: {self.namespace}"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
query = f"SELECT *, EXTRACT(EPOCH FROM create_time)::BIGINT as created_at FROM {table_name} WHERE workspace=$1 AND id=$2"
|
query = f"SELECT *, EXTRACT(EPOCH FROM create_time)::BIGINT as created_at FROM {table_name} WHERE workspace=$1 AND id=$2"
|
||||||
params = {"workspace": self.db.workspace, "id": id}
|
params = {"workspace": self.workspace, "id": id}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = await self.db.query(query, params)
|
result = await self.db.query(query, params)
|
||||||
|
|
@ -2106,7 +2114,9 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
return dict(result)
|
return dict(result)
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error retrieving vector data for ID {id}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error retrieving vector data for ID {id}: {e}"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
||||||
|
|
@ -2123,18 +2133,22 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
|
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for IDs lookup: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for IDs lookup: {self.namespace}"
|
||||||
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
ids_str = ",".join([f"'{id}'" for id in ids])
|
ids_str = ",".join([f"'{id}'" for id in ids])
|
||||||
query = f"SELECT *, EXTRACT(EPOCH FROM create_time)::BIGINT as created_at FROM {table_name} WHERE workspace=$1 AND id IN ({ids_str})"
|
query = f"SELECT *, EXTRACT(EPOCH FROM create_time)::BIGINT as created_at FROM {table_name} WHERE workspace=$1 AND id IN ({ids_str})"
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = await self.db.query(query, params, multirows=True)
|
results = await self.db.query(query, params, multirows=True)
|
||||||
return [dict(record) for record in results]
|
return [dict(record) for record in results]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error retrieving vector data for IDs {ids}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error retrieving vector data for IDs {ids}: {e}"
|
||||||
|
)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def drop(self) -> dict[str, str]:
|
async def drop(self) -> dict[str, str]:
|
||||||
|
|
@ -2150,7 +2164,7 @@ class PGVectorStorage(BaseVectorStorage):
|
||||||
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
||||||
table_name=table_name
|
table_name=table_name
|
||||||
)
|
)
|
||||||
await self.db.execute(drop_sql, {"workspace": self.db.workspace})
|
await self.db.execute(drop_sql, {"workspace": self.workspace})
|
||||||
return {"status": "success", "message": "data dropped"}
|
return {"status": "success", "message": "data dropped"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
@ -2174,18 +2188,16 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
if self.db is None:
|
if self.db is None:
|
||||||
self.db = await ClientManager.get_client()
|
self.db = await ClientManager.get_client()
|
||||||
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
||||||
if self.db.workspace:
|
if self.db.workspace:
|
||||||
# Use PostgreSQLDB's workspace (highest priority)
|
# Use PostgreSQLDB's workspace (highest priority)
|
||||||
final_workspace = self.db.workspace
|
self.workspace = self.db.workspace
|
||||||
elif hasattr(self, "workspace") and self.workspace:
|
elif hasattr(self, "workspace") and self.workspace:
|
||||||
# Use storage class's workspace (medium priority)
|
# Use storage class's workspace (medium priority)
|
||||||
final_workspace = self.workspace
|
pass
|
||||||
self.db.workspace = final_workspace
|
else:
|
||||||
else:
|
# Use "default" for compatibility (lowest priority)
|
||||||
# Use "default" for compatibility (lowest priority)
|
self.workspace = "default"
|
||||||
final_workspace = "default"
|
|
||||||
self.db.workspace = final_workspace
|
|
||||||
|
|
||||||
async def finalize(self):
|
async def finalize(self):
|
||||||
if self.db is not None:
|
if self.db is not None:
|
||||||
|
|
@ -2198,7 +2210,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
table_name=namespace_to_table_name(self.namespace),
|
table_name=namespace_to_table_name(self.namespace),
|
||||||
ids=",".join([f"'{id}'" for id in keys]),
|
ids=",".join([f"'{id}'" for id in keys]),
|
||||||
)
|
)
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
try:
|
try:
|
||||||
res = await self.db.query(sql, params, multirows=True)
|
res = await self.db.query(sql, params, multirows=True)
|
||||||
if res:
|
if res:
|
||||||
|
|
@ -2211,13 +2223,13 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
return new_keys
|
return new_keys
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"PostgreSQL database,\nsql:{sql},\nparams:{params},\nerror:{e}"
|
f"[{self.workspace}] PostgreSQL database,\nsql:{sql},\nparams:{params},\nerror:{e}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
|
||||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
|
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
|
||||||
params = {"workspace": self.db.workspace, "id": id}
|
params = {"workspace": self.workspace, "id": id}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
if result is None or result == []:
|
if result is None or result == []:
|
||||||
return None
|
return None
|
||||||
|
|
@ -2262,7 +2274,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
sql = "SELECT * FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id = ANY($2)"
|
sql = "SELECT * FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id = ANY($2)"
|
||||||
params = {"workspace": self.db.workspace, "ids": ids}
|
params = {"workspace": self.workspace, "ids": ids}
|
||||||
|
|
||||||
results = await self.db.query(sql, params, True)
|
results = await self.db.query(sql, params, True)
|
||||||
|
|
||||||
|
|
@ -2315,7 +2327,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
FROM LIGHTRAG_DOC_STATUS
|
FROM LIGHTRAG_DOC_STATUS
|
||||||
where workspace=$1 GROUP BY STATUS
|
where workspace=$1 GROUP BY STATUS
|
||||||
"""
|
"""
|
||||||
result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
|
result = await self.db.query(sql, {"workspace": self.workspace}, True)
|
||||||
counts = {}
|
counts = {}
|
||||||
for doc in result:
|
for doc in result:
|
||||||
counts[doc["status"]] = doc["count"]
|
counts[doc["status"]] = doc["count"]
|
||||||
|
|
@ -2326,7 +2338,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
) -> dict[str, DocProcessingStatus]:
|
) -> dict[str, DocProcessingStatus]:
|
||||||
"""all documents with a specific status"""
|
"""all documents with a specific status"""
|
||||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2"
|
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2"
|
||||||
params = {"workspace": self.db.workspace, "status": status.value}
|
params = {"workspace": self.workspace, "status": status.value}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
|
|
||||||
docs_by_status = {}
|
docs_by_status = {}
|
||||||
|
|
@ -2380,7 +2392,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
) -> dict[str, DocProcessingStatus]:
|
) -> dict[str, DocProcessingStatus]:
|
||||||
"""Get all documents with a specific track_id"""
|
"""Get all documents with a specific track_id"""
|
||||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and track_id=$2"
|
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and track_id=$2"
|
||||||
params = {"workspace": self.db.workspace, "track_id": track_id}
|
params = {"workspace": self.workspace, "track_id": track_id}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
|
|
||||||
docs_by_track_id = {}
|
docs_by_track_id = {}
|
||||||
|
|
@ -2468,7 +2480,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
|
|
||||||
# Build WHERE clause
|
# Build WHERE clause
|
||||||
where_clause = "WHERE workspace=$1"
|
where_clause = "WHERE workspace=$1"
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
param_count = 1
|
param_count = 1
|
||||||
|
|
||||||
if status_filter is not None:
|
if status_filter is not None:
|
||||||
|
|
@ -2550,7 +2562,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
WHERE workspace=$1
|
WHERE workspace=$1
|
||||||
GROUP BY status
|
GROUP BY status
|
||||||
"""
|
"""
|
||||||
params = {"workspace": self.db.workspace}
|
params = {"workspace": self.workspace}
|
||||||
result = await self.db.query(sql, params, True)
|
result = await self.db.query(sql, params, True)
|
||||||
|
|
||||||
counts = {}
|
counts = {}
|
||||||
|
|
@ -2582,20 +2594,22 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
|
|
||||||
table_name = namespace_to_table_name(self.namespace)
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
if not table_name:
|
if not table_name:
|
||||||
logger.error(f"Unknown namespace for deletion: {self.namespace}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Unknown namespace for deletion: {self.namespace}"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.db.execute(
|
await self.db.execute(delete_sql, {"workspace": self.workspace, "ids": ids})
|
||||||
delete_sql, {"workspace": self.db.workspace, "ids": ids}
|
|
||||||
)
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Successfully deleted {len(ids)} records from {self.namespace}"
|
f"[{self.workspace}] Successfully deleted {len(ids)} records from {self.namespace}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting records from {self.namespace}: {e}")
|
logger.error(
|
||||||
|
f"[{self.workspace}] Error while deleting records from {self.namespace}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
||||||
"""Update or insert document status
|
"""Update or insert document status
|
||||||
|
|
@ -2603,7 +2617,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
Args:
|
Args:
|
||||||
data: dictionary of document IDs and their status data
|
data: dictionary of document IDs and their status data
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Inserting {len(data)} to {self.namespace}")
|
logger.debug(f"[{self.workspace}] Inserting {len(data)} to {self.namespace}")
|
||||||
if not data:
|
if not data:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -2629,7 +2643,9 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
# Convert to UTC and remove timezone info for storage
|
# Convert to UTC and remove timezone info for storage
|
||||||
return dt.astimezone(timezone.utc).replace(tzinfo=None)
|
return dt.astimezone(timezone.utc).replace(tzinfo=None)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
logger.warning(f"Unable to parse datetime string: {dt_str}")
|
logger.warning(
|
||||||
|
f"[{self.workspace}] Unable to parse datetime string: {dt_str}"
|
||||||
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Modified SQL to include created_at, updated_at, chunks_list, track_id, metadata, and error_msg in both INSERT and UPDATE operations
|
# Modified SQL to include created_at, updated_at, chunks_list, track_id, metadata, and error_msg in both INSERT and UPDATE operations
|
||||||
|
|
@ -2657,7 +2673,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
await self.db.execute(
|
await self.db.execute(
|
||||||
sql,
|
sql,
|
||||||
{
|
{
|
||||||
"workspace": self.db.workspace,
|
"workspace": self.workspace,
|
||||||
"id": k,
|
"id": k,
|
||||||
"content_summary": v["content_summary"],
|
"content_summary": v["content_summary"],
|
||||||
"content_length": v["content_length"],
|
"content_length": v["content_length"],
|
||||||
|
|
@ -2688,7 +2704,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||||
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
|
||||||
table_name=table_name
|
table_name=table_name
|
||||||
)
|
)
|
||||||
await self.db.execute(drop_sql, {"workspace": self.db.workspace})
|
await self.db.execute(drop_sql, {"workspace": self.workspace})
|
||||||
return {"status": "success", "message": "data dropped"}
|
return {"status": "success", "message": "data dropped"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
@ -2732,7 +2748,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
Returns:
|
Returns:
|
||||||
str: The graph name for the current workspace
|
str: The graph name for the current workspace
|
||||||
"""
|
"""
|
||||||
workspace = getattr(self, "workspace", None)
|
workspace = self.workspace
|
||||||
namespace = self.namespace
|
namespace = self.namespace
|
||||||
|
|
||||||
if workspace and workspace.strip() and workspace.strip().lower() != "default":
|
if workspace and workspace.strip() and workspace.strip().lower() != "default":
|
||||||
|
|
@ -2741,7 +2757,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
safe_namespace = re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
||||||
return f"{safe_workspace}_{safe_namespace}"
|
return f"{safe_workspace}_{safe_namespace}"
|
||||||
else:
|
else:
|
||||||
# When workspace is empty or "default", use namespace directly
|
# When the workspace is "default", use the namespace directly (for backward compatibility with legacy implementations)
|
||||||
return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
return re.sub(r"[^a-zA-Z0-9_]", "_", namespace)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -2764,64 +2780,64 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
async def initialize(self):
|
async def initialize(self):
|
||||||
if self.db is None:
|
if self.db is None:
|
||||||
self.db = await ClientManager.get_client()
|
self.db = await ClientManager.get_client()
|
||||||
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > None
|
# Implement workspace priority: PostgreSQLDB.workspace > self.workspace > "default"
|
||||||
if self.db.workspace:
|
if self.db.workspace:
|
||||||
# Use PostgreSQLDB's workspace (highest priority)
|
# Use PostgreSQLDB's workspace (highest priority)
|
||||||
final_workspace = self.db.workspace
|
self.workspace = self.db.workspace
|
||||||
elif hasattr(self, "workspace") and self.workspace:
|
elif hasattr(self, "workspace") and self.workspace:
|
||||||
# Use storage class's workspace (medium priority)
|
# Use storage class's workspace (medium priority)
|
||||||
final_workspace = self.workspace
|
pass
|
||||||
self.db.workspace = final_workspace
|
else:
|
||||||
else:
|
# Use "default" for compatibility (lowest priority)
|
||||||
# Use None for compatibility (lowest priority)
|
self.workspace = "default"
|
||||||
final_workspace = None
|
|
||||||
self.db.workspace = final_workspace
|
|
||||||
|
|
||||||
# Dynamically generate graph name based on workspace
|
# Dynamically generate graph name based on workspace
|
||||||
self.workspace = self.db.workspace
|
|
||||||
self.graph_name = self._get_workspace_graph_name()
|
self.graph_name = self._get_workspace_graph_name()
|
||||||
|
|
||||||
# Log the graph initialization for debugging
|
# Log the graph initialization for debugging
|
||||||
logger.info(
|
logger.info(
|
||||||
f"PostgreSQL Graph initialized: workspace='{self.workspace}', graph_name='{self.graph_name}'"
|
f"[{self.workspace}] PostgreSQL Graph initialized: graph_name='{self.graph_name}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create AGE extension and configure graph environment once at initialization
|
# Use graph database lock to ensure atomic operations and prevent deadlocks
|
||||||
async with self.db.pool.acquire() as connection:
|
graph_db_lock = get_graph_db_lock(enable_logging=False)
|
||||||
# First ensure AGE extension is created
|
async with graph_db_lock:
|
||||||
await PostgreSQLDB.configure_age_extension(connection)
|
# Create AGE extension and configure graph environment once at initialization
|
||||||
|
async with self.db.pool.acquire() as connection:
|
||||||
|
# First ensure AGE extension is created
|
||||||
|
await PostgreSQLDB.configure_age_extension(connection)
|
||||||
|
|
||||||
# Execute each statement separately and ignore errors
|
# Execute each statement separately and ignore errors
|
||||||
queries = [
|
queries = [
|
||||||
f"SELECT create_graph('{self.graph_name}')",
|
f"SELECT create_graph('{self.graph_name}')",
|
||||||
f"SELECT create_vlabel('{self.graph_name}', 'base');",
|
f"SELECT create_vlabel('{self.graph_name}', 'base');",
|
||||||
f"SELECT create_elabel('{self.graph_name}', 'DIRECTED');",
|
f"SELECT create_elabel('{self.graph_name}', 'DIRECTED');",
|
||||||
# f'CREATE INDEX CONCURRENTLY vertex_p_idx ON {self.graph_name}."_ag_label_vertex" (id)',
|
# f'CREATE INDEX CONCURRENTLY vertex_p_idx ON {self.graph_name}."_ag_label_vertex" (id)',
|
||||||
f'CREATE INDEX CONCURRENTLY vertex_idx_node_id ON {self.graph_name}."_ag_label_vertex" (ag_catalog.agtype_access_operator(properties, \'"entity_id"\'::agtype))',
|
f'CREATE INDEX CONCURRENTLY vertex_idx_node_id ON {self.graph_name}."_ag_label_vertex" (ag_catalog.agtype_access_operator(properties, \'"entity_id"\'::agtype))',
|
||||||
# f'CREATE INDEX CONCURRENTLY edge_p_idx ON {self.graph_name}."_ag_label_edge" (id)',
|
# f'CREATE INDEX CONCURRENTLY edge_p_idx ON {self.graph_name}."_ag_label_edge" (id)',
|
||||||
f'CREATE INDEX CONCURRENTLY edge_sid_idx ON {self.graph_name}."_ag_label_edge" (start_id)',
|
f'CREATE INDEX CONCURRENTLY edge_sid_idx ON {self.graph_name}."_ag_label_edge" (start_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY edge_eid_idx ON {self.graph_name}."_ag_label_edge" (end_id)',
|
f'CREATE INDEX CONCURRENTLY edge_eid_idx ON {self.graph_name}."_ag_label_edge" (end_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY edge_seid_idx ON {self.graph_name}."_ag_label_edge" (start_id,end_id)',
|
f'CREATE INDEX CONCURRENTLY edge_seid_idx ON {self.graph_name}."_ag_label_edge" (start_id,end_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY directed_p_idx ON {self.graph_name}."DIRECTED" (id)',
|
f'CREATE INDEX CONCURRENTLY directed_p_idx ON {self.graph_name}."DIRECTED" (id)',
|
||||||
f'CREATE INDEX CONCURRENTLY directed_eid_idx ON {self.graph_name}."DIRECTED" (end_id)',
|
f'CREATE INDEX CONCURRENTLY directed_eid_idx ON {self.graph_name}."DIRECTED" (end_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY directed_sid_idx ON {self.graph_name}."DIRECTED" (start_id)',
|
f'CREATE INDEX CONCURRENTLY directed_sid_idx ON {self.graph_name}."DIRECTED" (start_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY directed_seid_idx ON {self.graph_name}."DIRECTED" (start_id,end_id)',
|
f'CREATE INDEX CONCURRENTLY directed_seid_idx ON {self.graph_name}."DIRECTED" (start_id,end_id)',
|
||||||
f'CREATE INDEX CONCURRENTLY entity_p_idx ON {self.graph_name}."base" (id)',
|
f'CREATE INDEX CONCURRENTLY entity_p_idx ON {self.graph_name}."base" (id)',
|
||||||
f'CREATE INDEX CONCURRENTLY entity_idx_node_id ON {self.graph_name}."base" (ag_catalog.agtype_access_operator(properties, \'"entity_id"\'::agtype))',
|
f'CREATE INDEX CONCURRENTLY entity_idx_node_id ON {self.graph_name}."base" (ag_catalog.agtype_access_operator(properties, \'"entity_id"\'::agtype))',
|
||||||
f'CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON {self.graph_name}."base" using gin(properties)',
|
f'CREATE INDEX CONCURRENTLY entity_node_id_gin_idx ON {self.graph_name}."base" using gin(properties)',
|
||||||
f'ALTER TABLE {self.graph_name}."DIRECTED" CLUSTER ON directed_sid_idx',
|
f'ALTER TABLE {self.graph_name}."DIRECTED" CLUSTER ON directed_sid_idx',
|
||||||
]
|
]
|
||||||
|
|
||||||
for query in queries:
|
for query in queries:
|
||||||
# Use the new flag to silently ignore "already exists" errors
|
# Use the new flag to silently ignore "already exists" errors
|
||||||
# at the source, preventing log spam.
|
# at the source, preventing log spam.
|
||||||
await self.db.execute(
|
await self.db.execute(
|
||||||
query,
|
query,
|
||||||
upsert=True,
|
upsert=True,
|
||||||
ignore_if_exists=True, # Pass the new flag
|
ignore_if_exists=True, # Pass the new flag
|
||||||
with_age=True,
|
with_age=True,
|
||||||
graph_name=self.graph_name,
|
graph_name=self.graph_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def finalize(self):
|
async def finalize(self):
|
||||||
if self.db is not None:
|
if self.db is not None:
|
||||||
|
|
@ -3067,7 +3083,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
try:
|
try:
|
||||||
node_dict = json.loads(node_dict)
|
node_dict = json.loads(node_dict)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(f"Failed to parse node string: {node_dict}")
|
logger.warning(
|
||||||
|
f"[{self.workspace}] Failed to parse node string: {node_dict}"
|
||||||
|
)
|
||||||
|
|
||||||
return node_dict
|
return node_dict
|
||||||
return None
|
return None
|
||||||
|
|
@ -3122,7 +3140,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
try:
|
try:
|
||||||
result = json.loads(result)
|
result = json.loads(result)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(f"Failed to parse edge string: {result}")
|
logger.warning(
|
||||||
|
f"[{self.workspace}] Failed to parse edge string: {result}"
|
||||||
|
)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -3188,7 +3208,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
await self._query(query, readonly=False, upsert=True)
|
await self._query(query, readonly=False, upsert=True)
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error(f"POSTGRES, upsert_node error on node_id: `{node_id}`")
|
logger.error(
|
||||||
|
f"[{self.workspace}] POSTGRES, upsert_node error on node_id: `{node_id}`"
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
|
|
@ -3232,7 +3254,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"POSTGRES, upsert_edge error on edge: `{source_node_id}`-`{target_node_id}`"
|
f"[{self.workspace}] POSTGRES, upsert_edge error on edge: `{source_node_id}`-`{target_node_id}`"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
@ -3253,7 +3275,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
try:
|
try:
|
||||||
await self._query(query, readonly=False)
|
await self._query(query, readonly=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error during node deletion: {%s}", e)
|
logger.error(f"[{self.workspace}] Error during node deletion: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def remove_nodes(self, node_ids: list[str]) -> None:
|
async def remove_nodes(self, node_ids: list[str]) -> None:
|
||||||
|
|
@ -3275,7 +3297,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
try:
|
try:
|
||||||
await self._query(query, readonly=False)
|
await self._query(query, readonly=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Error during node removal: {%s}", e)
|
logger.error(f"[{self.workspace}] Error during node removal: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def remove_edges(self, edges: list[tuple[str, str]]) -> None:
|
async def remove_edges(self, edges: list[tuple[str, str]]) -> None:
|
||||||
|
|
@ -3296,9 +3318,11 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self._query(query, readonly=False)
|
await self._query(query, readonly=False)
|
||||||
logger.debug(f"Deleted edge from '{source}' to '{target}'")
|
logger.debug(
|
||||||
|
f"[{self.workspace}] Deleted edge from '{source}' to '{target}'"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error during edge deletion: {str(e)}")
|
logger.error(f"[{self.workspace}] Error during edge deletion: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
async def get_nodes_batch(self, node_ids: list[str]) -> dict[str, dict]:
|
async def get_nodes_batch(self, node_ids: list[str]) -> dict[str, dict]:
|
||||||
|
|
@ -3339,7 +3363,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
node_dict = json.loads(node_dict)
|
node_dict = json.loads(node_dict)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse node string in batch: {node_dict}"
|
f"[{self.workspace}] Failed to parse node string in batch: {node_dict}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove the 'base' label if present in a 'labels' property
|
# Remove the 'base' label if present in a 'labels' property
|
||||||
|
|
@ -3502,7 +3526,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
edge_props = json.loads(edge_props)
|
edge_props = json.loads(edge_props)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse edge properties string: {edge_props}"
|
f"[{self.workspace}] Failed to parse edge properties string: {edge_props}"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -3518,7 +3542,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
edge_props = json.loads(edge_props)
|
edge_props = json.loads(edge_props)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse edge properties string: {edge_props}"
|
f"[{self.workspace}] Failed to parse edge properties string: {edge_props}"
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -3640,7 +3664,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
node_dict = json.loads(node_dict)
|
node_dict = json.loads(node_dict)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse node string in batch: {node_dict}"
|
f"[{self.workspace}] Failed to parse node string in batch: {node_dict}"
|
||||||
)
|
)
|
||||||
|
|
||||||
node_dict["id"] = node_dict["entity_id"]
|
node_dict["id"] = node_dict["entity_id"]
|
||||||
|
|
@ -3675,7 +3699,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
edge_agtype = json.loads(edge_agtype)
|
edge_agtype = json.loads(edge_agtype)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse edge string in batch: {edge_agtype}"
|
f"[{self.workspace}] Failed to parse edge string in batch: {edge_agtype}"
|
||||||
)
|
)
|
||||||
|
|
||||||
source_agtype = item["source"]["properties"]
|
source_agtype = item["source"]["properties"]
|
||||||
|
|
@ -3685,7 +3709,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
source_agtype = json.loads(source_agtype)
|
source_agtype = json.loads(source_agtype)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse node string in batch: {source_agtype}"
|
f"[{self.workspace}] Failed to parse node string in batch: {source_agtype}"
|
||||||
)
|
)
|
||||||
|
|
||||||
target_agtype = item["target"]["properties"]
|
target_agtype = item["target"]["properties"]
|
||||||
|
|
@ -3695,7 +3719,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
target_agtype = json.loads(target_agtype)
|
target_agtype = json.loads(target_agtype)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse node string in batch: {target_agtype}"
|
f"[{self.workspace}] Failed to parse node string in batch: {target_agtype}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if edge_agtype and source_agtype and target_agtype:
|
if edge_agtype and source_agtype and target_agtype:
|
||||||
|
|
@ -3964,7 +3988,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
|
|
||||||
node_ids = [str(result["node_id"]) for result in node_results]
|
node_ids = [str(result["node_id"]) for result in node_results]
|
||||||
|
|
||||||
logger.info(f"Total nodes: {total_nodes}, Selected nodes: {len(node_ids)}")
|
logger.info(
|
||||||
|
f"[{self.workspace}] Total nodes: {total_nodes}, Selected nodes: {len(node_ids)}"
|
||||||
|
)
|
||||||
|
|
||||||
if node_ids:
|
if node_ids:
|
||||||
formatted_ids = ", ".join(node_ids)
|
formatted_ids = ", ".join(node_ids)
|
||||||
|
|
@ -4028,13 +4054,13 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
kg = await self._bfs_subgraph(node_label, max_depth, max_nodes)
|
kg = await self._bfs_subgraph(node_label, max_depth, max_nodes)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Subgraph query successful | Node count: {len(kg.nodes)} | Edge count: {len(kg.edges)}"
|
f"[{self.workspace}] Subgraph query successful | Node count: {len(kg.nodes)} | Edge count: {len(kg.edges)}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# For non-wildcard queries, use the BFS algorithm
|
# For non-wildcard queries, use the BFS algorithm
|
||||||
kg = await self._bfs_subgraph(node_label, max_depth, max_nodes)
|
kg = await self._bfs_subgraph(node_label, max_depth, max_nodes)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Subgraph query for '{node_label}' successful | Node count: {len(kg.nodes)} | Edge count: {len(kg.edges)}"
|
f"[{self.workspace}] Subgraph query for '{node_label}' successful | Node count: {len(kg.nodes)} | Edge count: {len(kg.edges)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
return kg
|
return kg
|
||||||
|
|
@ -4061,7 +4087,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
try:
|
try:
|
||||||
node_dict = json.loads(node_dict)
|
node_dict = json.loads(node_dict)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(f"Failed to parse node string: {node_dict}")
|
logger.warning(
|
||||||
|
f"[{self.workspace}] Failed to parse node string: {node_dict}"
|
||||||
|
)
|
||||||
|
|
||||||
# Add node id (entity_id) to the dictionary for easier access
|
# Add node id (entity_id) to the dictionary for easier access
|
||||||
node_dict["id"] = node_dict.get("entity_id")
|
node_dict["id"] = node_dict.get("entity_id")
|
||||||
|
|
@ -4091,7 +4119,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
edge_properties = json.loads(edge_properties)
|
edge_properties = json.loads(edge_properties)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to parse edge properties string: {edge_properties}"
|
f"[{self.workspace}] Failed to parse edge properties string: {edge_properties}"
|
||||||
)
|
)
|
||||||
edge_properties = {}
|
edge_properties = {}
|
||||||
|
|
||||||
|
|
@ -4114,7 +4142,7 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
"message": f"workspace '{self.workspace}' graph data dropped",
|
"message": f"workspace '{self.workspace}' graph data dropped",
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error dropping graph: {e}")
|
logger.error(f"[{self.workspace}] Error dropping graph: {e}")
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue