diff --git a/cognee/tasks/schema/ingest_database_schema.py b/cognee/tasks/schema/ingest_database_schema.py new file mode 100644 index 000000000..6f9c538cd --- /dev/null +++ b/cognee/tasks/schema/ingest_database_schema.py @@ -0,0 +1,22 @@ +from typing import List, Dict +from cognee.infrastructure.engine.models.DataPoint import DataPoint + +async def ingest_database_schema( + database_config: Dict, + schema_name: str = "default", + max_sample_rows: int = 5, + node_set: List[str] = ["database_schema"] +) -> List[DataPoint]: + """ + Ingest database schema with sample data into dedicated nodeset + + Args: + database_config: Database connection configuration + schema_name: Name identifier for this schema + max_sample_rows: Maximum sample rows per table + node_set: Target nodeset (default: ["database_schema"]) + + Returns: + List of created DataPoint objects + """ + pass \ No newline at end of file diff --git a/cognee/tasks/schema/models.py b/cognee/tasks/schema/models.py new file mode 100644 index 000000000..b38ec5ff5 --- /dev/null +++ b/cognee/tasks/schema/models.py @@ -0,0 +1,32 @@ +from cognee.infrastructure.engine.models.DataPoint import DataPoint +from typing import List, Dict, Optional +from datetime import datetime + +class DatabaseSchema(DataPoint): + """Represents a complete database schema with sample data""" + schema_name: str + database_type: str # sqlite, postgres, etc. + tables: Dict[str, Dict] # Reuse existing schema format from SqlAlchemyAdapter + sample_data: Dict[str, List[Dict]] # Limited examples per table + extraction_timestamp: datetime + metadata: dict = {"index_fields": ["schema_name", "database_type"]} + +class SchemaTable(DataPoint): + """Represents an individual table schema with relationships""" + table_name: str + schema_name: str + columns: List[Dict] # Column definitions with types + primary_key: Optional[str] + foreign_keys: List[Dict] # Foreign key relationships + sample_rows: List[Dict] # Max 3-5 example rows + row_count_estimate: Optional[int] # Actual table size + metadata: dict = {"index_fields": ["table_name", "schema_name"]} + +class SchemaRelationship(DataPoint): + """Represents relationships between tables""" + source_table: str + target_table: str + relationship_type: str # "foreign_key", "one_to_many", etc. + source_column: str + target_column: str + metadata: dict = {"index_fields": ["source_table", "target_table"]} \ No newline at end of file