fix(postgres): improve AGE agtype parsing and simplify error logging
- Fix JSON parsing errors caused by :: characters in data content - Implement precise agtype string parsing using rfind() to separate JSON content from type identifiers - Add robust error handling for malformed JSON in graph data
This commit is contained in:
parent
83c8691221
commit
f033fd6f87
1 changed files with 77 additions and 29 deletions
|
|
@ -1905,53 +1905,101 @@ class PGGraphStorage(BaseGraphStorage):
|
||||||
the dictionary key is the field name and the value is the
|
the dictionary key is the field name and the value is the
|
||||||
value converted to a python type
|
value converted to a python type
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def parse_agtype_string(agtype_str: str) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Parse agtype string precisely, separating JSON content and type identifier
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agtype_str: String like '{"json": "content"}::vertex'
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(json_content, type_identifier)
|
||||||
|
"""
|
||||||
|
if not isinstance(agtype_str, str) or "::" not in agtype_str:
|
||||||
|
return agtype_str, ""
|
||||||
|
|
||||||
|
# Find the last :: from the right, which is the start of type identifier
|
||||||
|
last_double_colon = agtype_str.rfind("::")
|
||||||
|
|
||||||
|
if last_double_colon == -1:
|
||||||
|
return agtype_str, ""
|
||||||
|
|
||||||
|
# Separate JSON content and type identifier
|
||||||
|
json_content = agtype_str[:last_double_colon]
|
||||||
|
type_identifier = agtype_str[last_double_colon + 2 :]
|
||||||
|
|
||||||
|
return json_content, type_identifier
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def safe_json_parse(json_str: str, context: str = "") -> dict:
|
||||||
|
"""
|
||||||
|
Safe JSON parsing with simplified error logging
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return json.loads(json_str)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"JSON parsing failed ({context}): {e}")
|
||||||
|
logger.error(f"Raw data (first 100 chars): {repr(json_str[:100])}")
|
||||||
|
logger.error(f"Error position: line {e.lineno}, column {e.colno}")
|
||||||
|
return None
|
||||||
|
|
||||||
# result holder
|
# result holder
|
||||||
d = {}
|
d = {}
|
||||||
|
|
||||||
# prebuild a mapping of vertex_id to vertex mappings to be used
|
# prebuild a mapping of vertex_id to vertex mappings to be used
|
||||||
# later to build edges
|
# later to build edges
|
||||||
vertices = {}
|
vertices = {}
|
||||||
|
|
||||||
|
# First pass: preprocess vertices
|
||||||
for k in record.keys():
|
for k in record.keys():
|
||||||
v = record[k]
|
v = record[k]
|
||||||
# agtype comes back '{key: value}::type' which must be parsed
|
|
||||||
if isinstance(v, str) and "::" in v:
|
if isinstance(v, str) and "::" in v:
|
||||||
if v.startswith("[") and v.endswith("]"):
|
if v.startswith("[") and v.endswith("]"):
|
||||||
if "::vertex" not in v:
|
# Handle vertex arrays
|
||||||
continue
|
json_content, type_id = parse_agtype_string(v)
|
||||||
v = v.replace("::vertex", "")
|
if type_id == "vertex":
|
||||||
vertexes = json.loads(v)
|
vertexes = safe_json_parse(
|
||||||
for vertex in vertexes:
|
json_content, f"vertices array for {k}"
|
||||||
vertices[vertex["id"]] = vertex.get("properties")
|
)
|
||||||
|
if vertexes:
|
||||||
|
for vertex in vertexes:
|
||||||
|
vertices[vertex["id"]] = vertex.get("properties")
|
||||||
else:
|
else:
|
||||||
dtype = v.split("::")[-1]
|
# Handle single vertex
|
||||||
v = v.split("::")[0]
|
json_content, type_id = parse_agtype_string(v)
|
||||||
if dtype == "vertex":
|
if type_id == "vertex":
|
||||||
vertex = json.loads(v)
|
vertex = safe_json_parse(json_content, f"single vertex for {k}")
|
||||||
vertices[vertex["id"]] = vertex.get("properties")
|
if vertex:
|
||||||
|
vertices[vertex["id"]] = vertex.get("properties")
|
||||||
|
|
||||||
# iterate returned fields and parse appropriately
|
# Second pass: process all fields
|
||||||
for k in record.keys():
|
for k in record.keys():
|
||||||
v = record[k]
|
v = record[k]
|
||||||
if isinstance(v, str) and "::" in v:
|
if isinstance(v, str) and "::" in v:
|
||||||
if v.startswith("[") and v.endswith("]"):
|
if v.startswith("[") and v.endswith("]"):
|
||||||
if "::vertex" in v:
|
# Handle array types
|
||||||
v = v.replace("::vertex", "")
|
json_content, type_id = parse_agtype_string(v)
|
||||||
d[k] = json.loads(v)
|
if type_id in ["vertex", "edge"]:
|
||||||
|
parsed_data = safe_json_parse(
|
||||||
elif "::edge" in v:
|
json_content, f"array {type_id} for field {k}"
|
||||||
v = v.replace("::edge", "")
|
)
|
||||||
d[k] = json.loads(v)
|
d[k] = parsed_data if parsed_data is not None else None
|
||||||
else:
|
else:
|
||||||
print("WARNING: unsupported type")
|
logger.warning(f"Unknown array type: {type_id}")
|
||||||
continue
|
d[k] = None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dtype = v.split("::")[-1]
|
# Handle single objects
|
||||||
v = v.split("::")[0]
|
json_content, type_id = parse_agtype_string(v)
|
||||||
if dtype == "vertex":
|
if type_id in ["vertex", "edge"]:
|
||||||
d[k] = json.loads(v)
|
parsed_data = safe_json_parse(
|
||||||
elif dtype == "edge":
|
json_content, f"single {type_id} for field {k}"
|
||||||
d[k] = json.loads(v)
|
)
|
||||||
|
d[k] = parsed_data if parsed_data is not None else None
|
||||||
|
else:
|
||||||
|
# May be other types of agtype data, keep as is
|
||||||
|
d[k] = v
|
||||||
else:
|
else:
|
||||||
d[k] = v # Keep as string
|
d[k] = v # Keep as string
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue