From db39a439759a341de39511a3fc3c958131cfa938 Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Mon, 29 Sep 2025 13:50:11 +0200 Subject: [PATCH] fix: Resolve schema migration for Neo4j (#1482) ## Description Fix Neo4j issue with migrating DB schema ## Type of Change - [x] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Pre-submission Checklist - [x] **I have tested my changes thoroughly before submitting this PR** - [x] **This PR contains minimal changes necessary to address the issue/feature** - [x] My code follows the project's coding standards and style guidelines - [x] I have added tests that prove my fix is effective or that my feature works - [x] I have added necessary documentation (if applicable) - [x] All new and existing tests pass - [x] I have searched existing PRs to ensure this change hasn't been submitted already - [x] I have linked any relevant issues in the description - [x] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- cognee/tasks/schema/ingest_database_schema.py | 12 ++++++------ cognee/tasks/schema/models.py | 11 +++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/cognee/tasks/schema/ingest_database_schema.py b/cognee/tasks/schema/ingest_database_schema.py index e3823701c..2b1644067 100644 --- a/cognee/tasks/schema/ingest_database_schema.py +++ b/cognee/tasks/schema/ingest_database_schema.py @@ -1,3 +1,4 @@ +import json from typing import List, Dict from uuid import uuid5, NAMESPACE_OID from cognee.infrastructure.engine.models.DataPoint import DataPoint @@ -79,10 +80,10 @@ async def ingest_database_schema( schema_table = SchemaTable( id=uuid5(NAMESPACE_OID, name=f"{table_name}"), name=table_name, - columns=details["columns"], + columns=json.dumps(details["columns"], default=str), primary_key=details.get("primary_key"), - foreign_keys=details.get("foreign_keys", []), - sample_rows=rows, + foreign_keys=json.dumps(details.get("foreign_keys", []), default=str), + sample_rows=json.dumps(rows, default=str), row_count_estimate=row_count_estimate, description=f"Relational database table with '{table_name}' with {len(details['columns'])} columns and approx. {row_count_estimate} rows." f"Here are the columns this table contains: {details['columns']}" @@ -119,9 +120,8 @@ async def ingest_database_schema( id=uuid5(NAMESPACE_OID, name=id_str), name=migration_config.migration_db_name, database_type=migration_config.migration_db_provider, - tables=tables, - sample_data=sample_data, - extraction_timestamp=datetime.now(timezone.utc), + tables=json.dumps(tables, default=str), + sample_data=json.dumps(sample_data, default=str), description=f"Database schema containing {len(schema_tables)} tables and {len(schema_relationships)} relationships. " f"The database type is {migration_config.migration_db_provider}." f"The database contains the following tables: {tables}", diff --git a/cognee/tasks/schema/models.py b/cognee/tasks/schema/models.py index 4b13f420b..54b217c48 100644 --- a/cognee/tasks/schema/models.py +++ b/cognee/tasks/schema/models.py @@ -8,9 +8,8 @@ class DatabaseSchema(DataPoint): name: str database_type: str # sqlite, postgres, etc. - tables: Dict[str, Dict] # Reuse existing schema format from SqlAlchemyAdapter - sample_data: Dict[str, List[Dict]] # Limited examples per table - extraction_timestamp: datetime + tables: str # Reuse existing schema format from SqlAlchemyAdapter + sample_data: str # Limited examples per table description: str metadata: dict = {"index_fields": ["description", "name"]} @@ -19,10 +18,10 @@ class SchemaTable(DataPoint): """Represents an individual table schema with relationships""" name: str - columns: List[Dict] # Column definitions with types + columns: str # Column definitions with types primary_key: Optional[str] - foreign_keys: List[Dict] # Foreign key relationships - sample_rows: List[Dict] # Max 3-5 example rows + foreign_keys: str # Foreign key relationships + sample_rows: str # Max 3-5 example rows row_count_estimate: Optional[int] # Actual table size description: str metadata: dict = {"index_fields": ["description", "name"]}