Merge branch 'dev' into COG-975

2025-01-14 10:04:21 +01:00 · 2025-01-14 10:04:21 +01:00 · 3ba98b2ecd
commit 3ba98b2ecd
parent 047948ae54 12031e6c43
19 changed files with 975 additions and 86 deletions
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@ -7,7 +7,7 @@ on:
 jobs:
  docker-build-and-push:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
--- a/.github/workflows/py_lint.yml
+++ b/.github/workflows/py_lint.yml
@ -16,7 +16,7 @@ jobs:
      fail-fast: true
      matrix:
        os:
-          - ubuntu-22.04
+          - ubuntu-latest
        python-version: ["3.10.x", "3.11.x"]
    defaults:
--- a/.github/workflows/reusable_notebook.yml
+++ b/.github/workflows/reusable_notebook.yml
@ -51,6 +51,7 @@ jobs:
        env:
          ENV: 'dev'
          LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
          GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
        run: |
--- a/.github/workflows/ruff_format.yaml
+++ b/.github/workflows/ruff_format.yaml
@ -3,7 +3,7 @@ on: [ pull_request ]
 jobs:
  ruff:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/ruff-action@v2
--- a/.github/workflows/ruff_lint.yaml
+++ b/.github/workflows/ruff_lint.yaml
@ -3,7 +3,7 @@ on: [ pull_request ]
 jobs:
  ruff:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: astral-sh/ruff-action@v2
--- a/.github/workflows/test_deduplication.yml
+++ b/.github/workflows/test_deduplication.yml
@ -16,7 +16,7 @@ env:
 jobs:
  run_deduplication_test:
    name: test
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash
--- a/.github/workflows/test_llama_index_cognee_integration_notebook.yml
+++ b/.github/workflows/test_llama_index_cognee_integration_notebook.yml
@ -0,0 +1,20 @@
 name: test | llama index cognee integration notebook
 on:
  workflow_dispatch:
  pull_request:
    types: [labeled, synchronize]
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
 jobs:
  run_notebook_test:
      uses: ./.github/workflows/reusable_notebook.yml
      with:
        notebook-location: notebooks/llama_index_cognee_integration.ipynb
      secrets:
        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
        GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
        GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
--- a/.github/workflows/test_qdrant.yml
+++ b/.github/workflows/test_qdrant.yml
@ -17,7 +17,7 @@ jobs:
  run_qdrant_integration_test:
    name: test
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash
--- a/.github/workflows/test_weaviate.yml
+++ b/.github/workflows/test_weaviate.yml
@ -17,7 +17,7 @@ jobs:
  run_weaviate_integration_test:
    name: test
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    defaults:
      run:
        shell: bash
--- a/README.md
+++ b/README.md
@ -101,15 +101,9 @@ cognee.config.set_graphistry_config({
 })
 ```
-(Optional) To run the UI, go to cognee-frontend directory and run:
+(Optional) To run the with an UI, go to cognee-mcp directory and follow the instructions.
-```
+You will be able to use cognee as mcp tool and create graphs and query them.
-npm run dev
+
 ```
 or run everything in a docker container:
 ```
 docker-compose up
 ```
 Then navigate to localhost:3000
 If you want to use Cognee with PostgreSQL, make sure to set the following values in the .env file:
 ```
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -3,7 +3,8 @@ name = "cognee-mcp"
 version = "0.1.0"
 description = "A MCP server project"
 readme = "README.md"
-requires-python = ">=3.11.0"
+requires-python = ">=3.10"
 dependencies = [
    "mcp>=1.1.1",
    "openai==1.59.4",
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
--- a/cognee/modules/graph/cognee_graph/CogneeGraph.py
+++ b/cognee/modules/graph/cognee_graph/CogneeGraph.py
@ -8,7 +8,7 @@ from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInte
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
 from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
 import heapq
-from graphistry import edges
+import asyncio
 class CogneeGraph(CogneeAbstractGraph):
@ -127,51 +127,25 @@ class CogneeGraph(CogneeAbstractGraph):
                else:
                    print(f"Node with id {node_id} not found in the graph.")
-    async def map_vector_distances_to_graph_edges(
+    async def map_vector_distances_to_graph_edges(self, vector_engine, query) -> None:
        self, vector_engine, query
    ) -> None:  # :TODO: When we calculate edge embeddings in vector db change this similarly to node mapping
        try:
            # Step 1: Generate the query embedding
            query_vector = await vector_engine.embed_data([query])
            query_vector = query_vector[0]
            if query_vector is None or len(query_vector) == 0:
                raise ValueError("Failed to generate query embedding.")
-            # Step 2: Collect all unique relationship types
+            edge_distances = await vector_engine.get_distance_from_collection_elements(
-            unique_relationship_types = set()
+                "edge_type_relationship_name", query_text=query
-            for edge in self.edges:
+            )
                relationship_type = edge.attributes.get("relationship_type")
                if relationship_type:
                    unique_relationship_types.add(relationship_type)
-            # Step 3: Embed all unique relationship types
+            embedding_map = {result.payload["text"]: result.score for result in edge_distances}
            unique_relationship_types = list(unique_relationship_types)
            relationship_type_embeddings = await vector_engine.embed_data(unique_relationship_types)
            # Step 4: Map relationship types to their embeddings and calculate distances
            embedding_map = {}
            for relationship_type, embedding in zip(
                unique_relationship_types, relationship_type_embeddings
            ):
                edge_vector = np.array(embedding)
                # Calculate cosine similarity
                similarity = np.dot(query_vector, edge_vector) / (
                    np.linalg.norm(query_vector) * np.linalg.norm(edge_vector)
                )
                distance = 1 - similarity
                # Round the distance to 4 decimal places and store it
                embedding_map[relationship_type] = round(distance, 4)
            # Step 4: Assign precomputed distances to edges
            for edge in self.edges:
                relationship_type = edge.attributes.get("relationship_type")
                if not relationship_type or relationship_type not in embedding_map:
                    print(f"Edge {edge} has an unknown or missing relationship type.")
                    continue
                # Assign the precomputed distance
                edge.attributes["vector_distance"] = embedding_map[relationship_type]
        except Exception as ex:
--- a/cognee/modules/retrieval/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/brute_force_triplet_search.py
@ -62,24 +62,6 @@ async def brute_force_triplet_search(
    return retrieved_results
 def delete_duplicated_vector_db_elements(
    collections, results
 ):  #:TODO: This is just for now to fix vector db duplicates
    results_dict = {}
    for collection, results in zip(collections, results):
        seen_ids = set()
        unique_results = []
        for result in results:
            if result.id not in seen_ids:
                unique_results.append(result)
                seen_ids.add(result.id)
            else:
                print(f"Duplicate found in collection '{collection}': {result.id}")
        results_dict[collection] = unique_results
    return results_dict
 async def brute_force_search(
    query: str, user: User, top_k: int, collections: List[str] = None
 ) -> list:
@ -125,10 +107,7 @@ async def brute_force_search(
            ]
        )
-        ############################################# :TODO: Change when vector db does not contain duplicates
+        node_distances = {collection: result for collection, result in zip(collections, results)}
        node_distances = delete_duplicated_vector_db_elements(collections, results)
        # node_distances = {collection: result for collection, result in zip(collections, results)}
        ##############################################
        memory_fragment = CogneeGraph()
@ -140,14 +119,12 @@ async def brute_force_search(
        await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
        #:TODO: Change when vectordb contains edge embeddings
        await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)
        results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
        send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
        #:TODO: Once we have Edge pydantic models we should retrieve the exact edge and node objects from graph db
        return results
    except Exception as e:
--- a/cognee/modules/users/methods/get_default_user.py
+++ b/cognee/modules/users/methods/get_default_user.py
@ -1,4 +1,4 @@
-from sqlalchemy.orm import joinedload
+from sqlalchemy.orm import selectinload
 from sqlalchemy.future import select
 from cognee.modules.users.models import User
 from cognee.infrastructure.databases.relational import get_relational_engine
@ -11,7 +11,7 @@ async def get_default_user():
    async with db_engine.get_async_session() as session:
        query = (
            select(User)
-            .options(joinedload(User.groups))
+            .options(selectinload(User.groups))
            .where(User.email == "default_user@example.com")
        )
--- a/cognee/shared/utils.py
+++ b/cognee/shared/utils.py
@ -451,16 +451,20 @@ def graph_to_tuple(graph):
 def setup_logging(log_level=logging.INFO):
-    """This method sets up the logging configuration."""
+    """Sets up the logging configuration."""
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n")
    stream_handler = logging.StreamHandler(sys.stdout)
    stream_handler.setFormatter(formatter)
    stream_handler.setLevel(log_level)
-    logging.basicConfig(
+    root_logger = logging.getLogger()
-        level=log_level,
+
-        handlers=[stream_handler],
+    if root_logger.hasHandlers():
-    )
+        root_logger.handlers.clear()
    root_logger.addHandler(stream_handler)
    root_logger.setLevel(log_level)
 # ---------------- Example Usage ----------------
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@ -192,7 +192,7 @@ async def main(enable_steps):
 if __name__ == "__main__":
-    setup_logging(logging.INFO)
+    setup_logging(logging.ERROR)
    rebuild_kg = True
    retrieve = True
--- a/notebooks/llama_index_cognee_integration.ipynb
+++ b/notebooks/llama_index_cognee_integration.ipynb
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cognee"
-version = "0.1.21"
+version = "0.1.22"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = ["Vasilije Markovic", "Boris Arzentar"]
 readme = "README.md"