Merge branch 'dev' into COG-975

This commit is contained in:
Vasilije 2025-01-14 10:04:21 +01:00 committed by GitHub
commit 3ba98b2ecd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 975 additions and 86 deletions

View file

@ -7,7 +7,7 @@ on:
jobs:
docker-build-and-push:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- name: Checkout repository

View file

@ -16,7 +16,7 @@ jobs:
fail-fast: true
matrix:
os:
- ubuntu-22.04
- ubuntu-latest
python-version: ["3.10.x", "3.11.x"]
defaults:

View file

@ -51,6 +51,7 @@ jobs:
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: |

View file

@ -3,7 +3,7 @@ on: [ pull_request ]
jobs:
ruff:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2

View file

@ -3,7 +3,7 @@ on: [ pull_request ]
jobs:
ruff:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2

View file

@ -16,7 +16,7 @@ env:
jobs:
run_deduplication_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash

View file

@ -0,0 +1,20 @@
name: test | llama index cognee integration notebook
on:
workflow_dispatch:
pull_request:
types: [labeled, synchronize]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
run_notebook_test:
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/llama_index_cognee_integration.ipynb
secrets:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}

View file

@ -17,7 +17,7 @@ jobs:
run_qdrant_integration_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash

View file

@ -17,7 +17,7 @@ jobs:
run_weaviate_integration_test:
name: test
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
defaults:
run:
shell: bash

View file

@ -101,15 +101,9 @@ cognee.config.set_graphistry_config({
})
```
(Optional) To run the UI, go to cognee-frontend directory and run:
```
npm run dev
```
or run everything in a docker container:
```
docker-compose up
```
Then navigate to localhost:3000
(Optional) To run the with an UI, go to cognee-mcp directory and follow the instructions.
You will be able to use cognee as mcp tool and create graphs and query them.
If you want to use Cognee with PostgreSQL, make sure to set the following values in the .env file:
```

View file

@ -3,7 +3,8 @@ name = "cognee-mcp"
version = "0.1.0"
description = "A MCP server project"
readme = "README.md"
requires-python = ">=3.11.0"
requires-python = ">=3.10"
dependencies = [
"mcp>=1.1.1",
"openai==1.59.4",

641
cognee-mcp/uv.lock generated

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,7 @@ from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInte
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
import heapq
from graphistry import edges
import asyncio
class CogneeGraph(CogneeAbstractGraph):
@ -127,51 +127,25 @@ class CogneeGraph(CogneeAbstractGraph):
else:
print(f"Node with id {node_id} not found in the graph.")
async def map_vector_distances_to_graph_edges(
self, vector_engine, query
) -> None: # :TODO: When we calculate edge embeddings in vector db change this similarly to node mapping
async def map_vector_distances_to_graph_edges(self, vector_engine, query) -> None:
try:
# Step 1: Generate the query embedding
query_vector = await vector_engine.embed_data([query])
query_vector = query_vector[0]
if query_vector is None or len(query_vector) == 0:
raise ValueError("Failed to generate query embedding.")
# Step 2: Collect all unique relationship types
unique_relationship_types = set()
for edge in self.edges:
relationship_type = edge.attributes.get("relationship_type")
if relationship_type:
unique_relationship_types.add(relationship_type)
edge_distances = await vector_engine.get_distance_from_collection_elements(
"edge_type_relationship_name", query_text=query
)
# Step 3: Embed all unique relationship types
unique_relationship_types = list(unique_relationship_types)
relationship_type_embeddings = await vector_engine.embed_data(unique_relationship_types)
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
# Step 4: Map relationship types to their embeddings and calculate distances
embedding_map = {}
for relationship_type, embedding in zip(
unique_relationship_types, relationship_type_embeddings
):
edge_vector = np.array(embedding)
# Calculate cosine similarity
similarity = np.dot(query_vector, edge_vector) / (
np.linalg.norm(query_vector) * np.linalg.norm(edge_vector)
)
distance = 1 - similarity
# Round the distance to 4 decimal places and store it
embedding_map[relationship_type] = round(distance, 4)
# Step 4: Assign precomputed distances to edges
for edge in self.edges:
relationship_type = edge.attributes.get("relationship_type")
if not relationship_type or relationship_type not in embedding_map:
print(f"Edge {edge} has an unknown or missing relationship type.")
continue
# Assign the precomputed distance
edge.attributes["vector_distance"] = embedding_map[relationship_type]
except Exception as ex:

View file

@ -62,24 +62,6 @@ async def brute_force_triplet_search(
return retrieved_results
def delete_duplicated_vector_db_elements(
collections, results
): #:TODO: This is just for now to fix vector db duplicates
results_dict = {}
for collection, results in zip(collections, results):
seen_ids = set()
unique_results = []
for result in results:
if result.id not in seen_ids:
unique_results.append(result)
seen_ids.add(result.id)
else:
print(f"Duplicate found in collection '{collection}': {result.id}")
results_dict[collection] = unique_results
return results_dict
async def brute_force_search(
query: str, user: User, top_k: int, collections: List[str] = None
) -> list:
@ -125,10 +107,7 @@ async def brute_force_search(
]
)
############################################# :TODO: Change when vector db does not contain duplicates
node_distances = delete_duplicated_vector_db_elements(collections, results)
# node_distances = {collection: result for collection, result in zip(collections, results)}
##############################################
node_distances = {collection: result for collection, result in zip(collections, results)}
memory_fragment = CogneeGraph()
@ -140,14 +119,12 @@ async def brute_force_search(
await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
#:TODO: Change when vectordb contains edge embeddings
await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
#:TODO: Once we have Edge pydantic models we should retrieve the exact edge and node objects from graph db
return results
except Exception as e:

View file

@ -1,4 +1,4 @@
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.future import select
from cognee.modules.users.models import User
from cognee.infrastructure.databases.relational import get_relational_engine
@ -11,7 +11,7 @@ async def get_default_user():
async with db_engine.get_async_session() as session:
query = (
select(User)
.options(joinedload(User.groups))
.options(selectinload(User.groups))
.where(User.email == "default_user@example.com")
)

View file

@ -451,16 +451,20 @@ def graph_to_tuple(graph):
def setup_logging(log_level=logging.INFO):
"""This method sets up the logging configuration."""
"""Sets up the logging configuration."""
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n")
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter)
stream_handler.setLevel(log_level)
logging.basicConfig(
level=log_level,
handlers=[stream_handler],
)
root_logger = logging.getLogger()
if root_logger.hasHandlers():
root_logger.handlers.clear()
root_logger.addHandler(stream_handler)
root_logger.setLevel(log_level)
# ---------------- Example Usage ----------------

View file

@ -192,7 +192,7 @@ async def main(enable_steps):
if __name__ == "__main__":
setup_logging(logging.INFO)
setup_logging(logging.ERROR)
rebuild_kg = True
retrieve = True

File diff suppressed because one or more lines are too long

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "cognee"
version = "0.1.21"
version = "0.1.22"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = ["Vasilije Markovic", "Boris Arzentar"]
readme = "README.md"