From 15b7b8ef2b10316c1e28799e35fa2cb60e911e0b Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 14:54:35 +0100 Subject: [PATCH 1/4] fix: Resolve issue with table names in SQL commands Some SQL commands require lowercase characters in table names unless table name is wrapped in quotes. Renamed all new tables to use lowercase Fix COG-677 --- cognee/infrastructure/engine/models/DataPoint.py | 1 + cognee/modules/chunking/models/DocumentChunk.py | 1 + cognee/modules/engine/models/Entity.py | 1 + cognee/modules/engine/models/EntityType.py | 1 + cognee/tasks/chunks/query_chunks.py | 2 +- cognee/tasks/graph/query_graph_connections.py | 4 ++-- cognee/tasks/storage/index_data_points.py | 4 ++-- cognee/tasks/summarization/models.py | 1 + cognee/tasks/summarization/query_summaries.py | 2 +- cognee/tests/test_pgvector.py | 2 +- 10 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cognee/infrastructure/engine/models/DataPoint.py b/cognee/infrastructure/engine/models/DataPoint.py index 337306cb6..f8ea1c9f0 100644 --- a/cognee/infrastructure/engine/models/DataPoint.py +++ b/cognee/infrastructure/engine/models/DataPoint.py @@ -8,6 +8,7 @@ class MetaData(TypedDict): index_fields: list[str] class DataPoint(BaseModel): + __tablename__ = "data_point" id: UUID = Field(default_factory = uuid4) updated_at: Optional[datetime] = datetime.now(timezone.utc) _metadata: Optional[MetaData] = { diff --git a/cognee/modules/chunking/models/DocumentChunk.py b/cognee/modules/chunking/models/DocumentChunk.py index 975edb27e..b5b1cef94 100644 --- a/cognee/modules/chunking/models/DocumentChunk.py +++ b/cognee/modules/chunking/models/DocumentChunk.py @@ -3,6 +3,7 @@ from cognee.infrastructure.engine import DataPoint from cognee.modules.data.processing.document_types import Document class DocumentChunk(DataPoint): + __tablename__ = "document_chunk" text: str word_count: int chunk_index: int diff --git a/cognee/modules/engine/models/Entity.py b/cognee/modules/engine/models/Entity.py index c43774e38..cf946ceb6 100644 --- a/cognee/modules/engine/models/Entity.py +++ b/cognee/modules/engine/models/Entity.py @@ -3,6 +3,7 @@ from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from .EntityType import EntityType class Entity(DataPoint): + __tablename__ = "entity" name: str is_a: EntityType description: str diff --git a/cognee/modules/engine/models/EntityType.py b/cognee/modules/engine/models/EntityType.py index b4f495857..56092f261 100644 --- a/cognee/modules/engine/models/EntityType.py +++ b/cognee/modules/engine/models/EntityType.py @@ -2,6 +2,7 @@ from cognee.infrastructure.engine import DataPoint from cognee.modules.chunking.models.DocumentChunk import DocumentChunk class EntityType(DataPoint): + __tablename__ = "entity_type" name: str type: str description: str diff --git a/cognee/tasks/chunks/query_chunks.py b/cognee/tasks/chunks/query_chunks.py index 93f32a640..399528ee9 100644 --- a/cognee/tasks/chunks/query_chunks.py +++ b/cognee/tasks/chunks/query_chunks.py @@ -10,7 +10,7 @@ async def query_chunks(query: str) -> list[dict]: """ vector_engine = get_vector_engine() - found_chunks = await vector_engine.search("DocumentChunk_text", query, limit = 5) + found_chunks = await vector_engine.search("document_chunk_text", query, limit = 5) chunks = [result.payload for result in found_chunks] diff --git a/cognee/tasks/graph/query_graph_connections.py b/cognee/tasks/graph/query_graph_connections.py index cd4d76a5e..4020ddd13 100644 --- a/cognee/tasks/graph/query_graph_connections.py +++ b/cognee/tasks/graph/query_graph_connections.py @@ -27,8 +27,8 @@ async def query_graph_connections(query: str, exploration_levels = 1) -> list[(s else: vector_engine = get_vector_engine() results = await asyncio.gather( - vector_engine.search("Entity_name", query_text = query, limit = 5), - vector_engine.search("EntityType_name", query_text = query, limit = 5), + vector_engine.search("entity_name", query_text = query, limit = 5), + vector_engine.search("entity_type_name", query_text = query, limit = 5), ) results = [*results[0], *results[1]] relevant_results = [result for result in results if result.score < 0.5][:5] diff --git a/cognee/tasks/storage/index_data_points.py b/cognee/tasks/storage/index_data_points.py index dc74d705d..12903173a 100644 --- a/cognee/tasks/storage/index_data_points.py +++ b/cognee/tasks/storage/index_data_points.py @@ -16,10 +16,10 @@ async def index_data_points(data_points: list[DataPoint]): data_point_type = type(data_point) for field_name in data_point._metadata["index_fields"]: - index_name = f"{data_point_type.__name__}.{field_name}" + index_name = f"{data_point_type.__tablename__}.{field_name}" if index_name not in created_indexes: - await vector_engine.create_vector_index(data_point_type.__name__, field_name) + await vector_engine.create_vector_index(data_point_type.__tablename__, field_name) created_indexes[index_name] = True if index_name not in index_points: diff --git a/cognee/tasks/summarization/models.py b/cognee/tasks/summarization/models.py index c6a932b37..955c0e2fa 100644 --- a/cognee/tasks/summarization/models.py +++ b/cognee/tasks/summarization/models.py @@ -3,6 +3,7 @@ from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.data.processing.document_types import Document class TextSummary(DataPoint): + __tablename__ = "text_summary" text: str made_from: DocumentChunk diff --git a/cognee/tasks/summarization/query_summaries.py b/cognee/tasks/summarization/query_summaries.py index 896839143..d9ec0fa00 100644 --- a/cognee/tasks/summarization/query_summaries.py +++ b/cognee/tasks/summarization/query_summaries.py @@ -10,7 +10,7 @@ async def query_summaries(query: str) -> list: """ vector_engine = get_vector_engine() - summaries_results = await vector_engine.search("TextSummary_text", query, limit = 5) + summaries_results = await vector_engine.search("text_summary_text", query, limit = 5) summaries = [summary.payload for summary in summaries_results] diff --git a/cognee/tests/test_pgvector.py b/cognee/tests/test_pgvector.py index b5a6fc446..1466e195f 100644 --- a/cognee/tests/test_pgvector.py +++ b/cognee/tests/test_pgvector.py @@ -65,7 +65,7 @@ async def main(): from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] random_node_name = random_node.payload["text"] search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name) From e4d00403ba5f0c23b467798a5ce936bed53d11ac Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 15:02:22 +0100 Subject: [PATCH 2/4] fix: Update table names in tests Update table names in tests to accomodate to recent fix Fix COG-677 --- cognee/tests/test_library.py | 2 +- cognee/tests/test_neo4j.py | 2 +- cognee/tests/test_qdrant.py | 2 +- cognee/tests/test_weaviate.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cognee/tests/test_library.py b/cognee/tests/test_library.py index 88c9cdc7b..66d218c3b 100755 --- a/cognee/tests/test_library.py +++ b/cognee/tests/test_library.py @@ -32,7 +32,7 @@ async def main(): from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "AI"))[0] + random_node = (await vector_engine.search("entity_name", "AI"))[0] random_node_name = random_node.payload["text"] search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name) diff --git a/cognee/tests/test_neo4j.py b/cognee/tests/test_neo4j.py index 44bacd9b5..756b29cc4 100644 --- a/cognee/tests/test_neo4j.py +++ b/cognee/tests/test_neo4j.py @@ -36,7 +36,7 @@ async def main(): from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] random_node_name = random_node.payload["text"] search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name) diff --git a/cognee/tests/test_qdrant.py b/cognee/tests/test_qdrant.py index 8ca525f0c..680399e60 100644 --- a/cognee/tests/test_qdrant.py +++ b/cognee/tests/test_qdrant.py @@ -37,7 +37,7 @@ async def main(): from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] random_node_name = random_node.payload["text"] search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name) diff --git a/cognee/tests/test_weaviate.py b/cognee/tests/test_weaviate.py index cdb4b9349..c93dc036a 100644 --- a/cognee/tests/test_weaviate.py +++ b/cognee/tests/test_weaviate.py @@ -35,7 +35,7 @@ async def main(): from cognee.infrastructure.databases.vector import get_vector_engine vector_engine = get_vector_engine() - random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] + random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0] random_node_name = random_node.payload["text"] search_results = await cognee.search(SearchType.INSIGHTS, query_text = random_node_name) From 70fe6ac54120d05b4947f515b52a10539412be5f Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 15:07:38 +0100 Subject: [PATCH 3/4] fix: Update table name in notebook Update table name to use latest in notebook Fix COG-677 --- notebooks/cognee_demo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 45f5a618c..3246a5246 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -758,7 +758,7 @@ "from cognee.infrastructure.databases.vector import get_vector_engine\n", "\n", "vector_engine = get_vector_engine()\n", - "results = await search(vector_engine, \"Entity_name\", \"sarah.nguyen@example.com\")\n", + "results = await search(vector_engine, \"entity_name\", \"sarah.nguyen@example.com\")\n", "for result in results:\n", " print(result)" ] @@ -881,7 +881,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.9.6" } }, "nbformat": 4, From f9353d25faf91407cb823ebf441325bfdc6ebffc Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Wed, 20 Nov 2024 15:14:38 +0100 Subject: [PATCH 4/4] fix: Update table name in notebook Update table name in notebook Fix COG-677 --- notebooks/cognee_demo.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/cognee_demo.ipynb b/notebooks/cognee_demo.ipynb index 3246a5246..67bb4e07f 100644 --- a/notebooks/cognee_demo.ipynb +++ b/notebooks/cognee_demo.ipynb @@ -788,7 +788,7 @@ "source": [ "from cognee.api.v1.search import SearchType\n", "\n", - "node = (await vector_engine.search(\"Entity_name\", \"sarah.nguyen@example.com\"))[0]\n", + "node = (await vector_engine.search(\"entity_name\", \"sarah.nguyen@example.com\"))[0]\n", "node_name = node.payload[\"text\"]\n", "\n", "search_results = await cognee.search(SearchType.SUMMARIES, query_text = node_name)\n",