Merge branch 'dev' into COG-975
This commit is contained in:
commit
3ba98b2ecd
19 changed files with 975 additions and 86 deletions
2
.github/workflows/dockerhub.yml
vendored
2
.github/workflows/dockerhub.yml
vendored
|
|
@ -7,7 +7,7 @@ on:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
docker-build-and-push:
|
docker-build-and-push:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
|
|
|
||||||
2
.github/workflows/py_lint.yml
vendored
2
.github/workflows/py_lint.yml
vendored
|
|
@ -16,7 +16,7 @@ jobs:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
os:
|
os:
|
||||||
- ubuntu-22.04
|
- ubuntu-latest
|
||||||
python-version: ["3.10.x", "3.11.x"]
|
python-version: ["3.10.x", "3.11.x"]
|
||||||
|
|
||||||
defaults:
|
defaults:
|
||||||
|
|
|
||||||
1
.github/workflows/reusable_notebook.yml
vendored
1
.github/workflows/reusable_notebook.yml
vendored
|
|
@ -51,6 +51,7 @@ jobs:
|
||||||
env:
|
env:
|
||||||
ENV: 'dev'
|
ENV: 'dev'
|
||||||
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||||
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||||
run: |
|
run: |
|
||||||
|
|
|
||||||
2
.github/workflows/ruff_format.yaml
vendored
2
.github/workflows/ruff_format.yaml
vendored
|
|
@ -3,7 +3,7 @@ on: [ pull_request ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ruff:
|
ruff:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: astral-sh/ruff-action@v2
|
- uses: astral-sh/ruff-action@v2
|
||||||
|
|
|
||||||
2
.github/workflows/ruff_lint.yaml
vendored
2
.github/workflows/ruff_lint.yaml
vendored
|
|
@ -3,7 +3,7 @@ on: [ pull_request ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
ruff:
|
ruff:
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: astral-sh/ruff-action@v2
|
- uses: astral-sh/ruff-action@v2
|
||||||
|
|
|
||||||
2
.github/workflows/test_deduplication.yml
vendored
2
.github/workflows/test_deduplication.yml
vendored
|
|
@ -16,7 +16,7 @@ env:
|
||||||
jobs:
|
jobs:
|
||||||
run_deduplication_test:
|
run_deduplication_test:
|
||||||
name: test
|
name: test
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
20
.github/workflows/test_llama_index_cognee_integration_notebook.yml
vendored
Normal file
20
.github/workflows/test_llama_index_cognee_integration_notebook.yml
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
name: test | llama index cognee integration notebook
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
pull_request:
|
||||||
|
types: [labeled, synchronize]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_notebook_test:
|
||||||
|
uses: ./.github/workflows/reusable_notebook.yml
|
||||||
|
with:
|
||||||
|
notebook-location: notebooks/llama_index_cognee_integration.ipynb
|
||||||
|
secrets:
|
||||||
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
|
||||||
|
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
|
||||||
2
.github/workflows/test_qdrant.yml
vendored
2
.github/workflows/test_qdrant.yml
vendored
|
|
@ -17,7 +17,7 @@ jobs:
|
||||||
|
|
||||||
run_qdrant_integration_test:
|
run_qdrant_integration_test:
|
||||||
name: test
|
name: test
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
2
.github/workflows/test_weaviate.yml
vendored
2
.github/workflows/test_weaviate.yml
vendored
|
|
@ -17,7 +17,7 @@ jobs:
|
||||||
|
|
||||||
run_weaviate_integration_test:
|
run_weaviate_integration_test:
|
||||||
name: test
|
name: test
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
|
||||||
12
README.md
12
README.md
|
|
@ -101,15 +101,9 @@ cognee.config.set_graphistry_config({
|
||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
(Optional) To run the UI, go to cognee-frontend directory and run:
|
(Optional) To run the with an UI, go to cognee-mcp directory and follow the instructions.
|
||||||
```
|
You will be able to use cognee as mcp tool and create graphs and query them.
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
or run everything in a docker container:
|
|
||||||
```
|
|
||||||
docker-compose up
|
|
||||||
```
|
|
||||||
Then navigate to localhost:3000
|
|
||||||
|
|
||||||
If you want to use Cognee with PostgreSQL, make sure to set the following values in the .env file:
|
If you want to use Cognee with PostgreSQL, make sure to set the following values in the .env file:
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,8 @@ name = "cognee-mcp"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "A MCP server project"
|
description = "A MCP server project"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11.0"
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"mcp>=1.1.1",
|
"mcp>=1.1.1",
|
||||||
"openai==1.59.4",
|
"openai==1.59.4",
|
||||||
|
|
|
||||||
641
cognee-mcp/uv.lock
generated
641
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -8,7 +8,7 @@ from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInte
|
||||||
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
|
||||||
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
|
||||||
import heapq
|
import heapq
|
||||||
from graphistry import edges
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
class CogneeGraph(CogneeAbstractGraph):
|
class CogneeGraph(CogneeAbstractGraph):
|
||||||
|
|
@ -127,51 +127,25 @@ class CogneeGraph(CogneeAbstractGraph):
|
||||||
else:
|
else:
|
||||||
print(f"Node with id {node_id} not found in the graph.")
|
print(f"Node with id {node_id} not found in the graph.")
|
||||||
|
|
||||||
async def map_vector_distances_to_graph_edges(
|
async def map_vector_distances_to_graph_edges(self, vector_engine, query) -> None:
|
||||||
self, vector_engine, query
|
|
||||||
) -> None: # :TODO: When we calculate edge embeddings in vector db change this similarly to node mapping
|
|
||||||
try:
|
try:
|
||||||
# Step 1: Generate the query embedding
|
|
||||||
query_vector = await vector_engine.embed_data([query])
|
query_vector = await vector_engine.embed_data([query])
|
||||||
query_vector = query_vector[0]
|
query_vector = query_vector[0]
|
||||||
if query_vector is None or len(query_vector) == 0:
|
if query_vector is None or len(query_vector) == 0:
|
||||||
raise ValueError("Failed to generate query embedding.")
|
raise ValueError("Failed to generate query embedding.")
|
||||||
|
|
||||||
# Step 2: Collect all unique relationship types
|
edge_distances = await vector_engine.get_distance_from_collection_elements(
|
||||||
unique_relationship_types = set()
|
"edge_type_relationship_name", query_text=query
|
||||||
for edge in self.edges:
|
)
|
||||||
relationship_type = edge.attributes.get("relationship_type")
|
|
||||||
if relationship_type:
|
|
||||||
unique_relationship_types.add(relationship_type)
|
|
||||||
|
|
||||||
# Step 3: Embed all unique relationship types
|
embedding_map = {result.payload["text"]: result.score for result in edge_distances}
|
||||||
unique_relationship_types = list(unique_relationship_types)
|
|
||||||
relationship_type_embeddings = await vector_engine.embed_data(unique_relationship_types)
|
|
||||||
|
|
||||||
# Step 4: Map relationship types to their embeddings and calculate distances
|
|
||||||
embedding_map = {}
|
|
||||||
for relationship_type, embedding in zip(
|
|
||||||
unique_relationship_types, relationship_type_embeddings
|
|
||||||
):
|
|
||||||
edge_vector = np.array(embedding)
|
|
||||||
|
|
||||||
# Calculate cosine similarity
|
|
||||||
similarity = np.dot(query_vector, edge_vector) / (
|
|
||||||
np.linalg.norm(query_vector) * np.linalg.norm(edge_vector)
|
|
||||||
)
|
|
||||||
distance = 1 - similarity
|
|
||||||
|
|
||||||
# Round the distance to 4 decimal places and store it
|
|
||||||
embedding_map[relationship_type] = round(distance, 4)
|
|
||||||
|
|
||||||
# Step 4: Assign precomputed distances to edges
|
|
||||||
for edge in self.edges:
|
for edge in self.edges:
|
||||||
relationship_type = edge.attributes.get("relationship_type")
|
relationship_type = edge.attributes.get("relationship_type")
|
||||||
if not relationship_type or relationship_type not in embedding_map:
|
if not relationship_type or relationship_type not in embedding_map:
|
||||||
print(f"Edge {edge} has an unknown or missing relationship type.")
|
print(f"Edge {edge} has an unknown or missing relationship type.")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Assign the precomputed distance
|
|
||||||
edge.attributes["vector_distance"] = embedding_map[relationship_type]
|
edge.attributes["vector_distance"] = embedding_map[relationship_type]
|
||||||
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
|
|
|
||||||
|
|
@ -62,24 +62,6 @@ async def brute_force_triplet_search(
|
||||||
return retrieved_results
|
return retrieved_results
|
||||||
|
|
||||||
|
|
||||||
def delete_duplicated_vector_db_elements(
|
|
||||||
collections, results
|
|
||||||
): #:TODO: This is just for now to fix vector db duplicates
|
|
||||||
results_dict = {}
|
|
||||||
for collection, results in zip(collections, results):
|
|
||||||
seen_ids = set()
|
|
||||||
unique_results = []
|
|
||||||
for result in results:
|
|
||||||
if result.id not in seen_ids:
|
|
||||||
unique_results.append(result)
|
|
||||||
seen_ids.add(result.id)
|
|
||||||
else:
|
|
||||||
print(f"Duplicate found in collection '{collection}': {result.id}")
|
|
||||||
results_dict[collection] = unique_results
|
|
||||||
|
|
||||||
return results_dict
|
|
||||||
|
|
||||||
|
|
||||||
async def brute_force_search(
|
async def brute_force_search(
|
||||||
query: str, user: User, top_k: int, collections: List[str] = None
|
query: str, user: User, top_k: int, collections: List[str] = None
|
||||||
) -> list:
|
) -> list:
|
||||||
|
|
@ -125,10 +107,7 @@ async def brute_force_search(
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
############################################# :TODO: Change when vector db does not contain duplicates
|
node_distances = {collection: result for collection, result in zip(collections, results)}
|
||||||
node_distances = delete_duplicated_vector_db_elements(collections, results)
|
|
||||||
# node_distances = {collection: result for collection, result in zip(collections, results)}
|
|
||||||
##############################################
|
|
||||||
|
|
||||||
memory_fragment = CogneeGraph()
|
memory_fragment = CogneeGraph()
|
||||||
|
|
||||||
|
|
@ -140,14 +119,12 @@ async def brute_force_search(
|
||||||
|
|
||||||
await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
|
await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
|
||||||
|
|
||||||
#:TODO: Change when vectordb contains edge embeddings
|
|
||||||
await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)
|
await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)
|
||||||
|
|
||||||
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
|
||||||
|
|
||||||
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
|
send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
|
||||||
|
|
||||||
#:TODO: Once we have Edge pydantic models we should retrieve the exact edge and node objects from graph db
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from sqlalchemy.orm import joinedload
|
from sqlalchemy.orm import selectinload
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
@ -11,7 +11,7 @@ async def get_default_user():
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
query = (
|
query = (
|
||||||
select(User)
|
select(User)
|
||||||
.options(joinedload(User.groups))
|
.options(selectinload(User.groups))
|
||||||
.where(User.email == "default_user@example.com")
|
.where(User.email == "default_user@example.com")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -451,16 +451,20 @@ def graph_to_tuple(graph):
|
||||||
|
|
||||||
|
|
||||||
def setup_logging(log_level=logging.INFO):
|
def setup_logging(log_level=logging.INFO):
|
||||||
"""This method sets up the logging configuration."""
|
"""Sets up the logging configuration."""
|
||||||
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n")
|
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s\n")
|
||||||
|
|
||||||
stream_handler = logging.StreamHandler(sys.stdout)
|
stream_handler = logging.StreamHandler(sys.stdout)
|
||||||
stream_handler.setFormatter(formatter)
|
stream_handler.setFormatter(formatter)
|
||||||
stream_handler.setLevel(log_level)
|
stream_handler.setLevel(log_level)
|
||||||
|
|
||||||
logging.basicConfig(
|
root_logger = logging.getLogger()
|
||||||
level=log_level,
|
|
||||||
handlers=[stream_handler],
|
if root_logger.hasHandlers():
|
||||||
)
|
root_logger.handlers.clear()
|
||||||
|
|
||||||
|
root_logger.addHandler(stream_handler)
|
||||||
|
root_logger.setLevel(log_level)
|
||||||
|
|
||||||
|
|
||||||
# ---------------- Example Usage ----------------
|
# ---------------- Example Usage ----------------
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,7 @@ async def main(enable_steps):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logging(logging.INFO)
|
setup_logging(logging.ERROR)
|
||||||
|
|
||||||
rebuild_kg = True
|
rebuild_kg = True
|
||||||
retrieve = True
|
retrieve = True
|
||||||
|
|
|
||||||
285
notebooks/llama_index_cognee_integration.ipynb
Normal file
285
notebooks/llama_index_cognee_integration.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "cognee"
|
name = "cognee"
|
||||||
version = "0.1.21"
|
version = "0.1.22"
|
||||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||||
authors = ["Vasilije Markovic", "Boris Arzentar"]
|
authors = ["Vasilije Markovic", "Boris Arzentar"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue