Merge remote-tracking branch 'origin/dev' into feat/modal-parallelization

This commit is contained in:
Boris Arzentar 2025-07-08 22:13:22 +02:00
commit 340a61b20a
No known key found for this signature in database
GPG key ID: D5CC274C784807B7
10 changed files with 261 additions and 34 deletions

View file

@ -248,3 +248,32 @@ jobs:
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_parallel_databases.py
test-permissions:
name: Test permissions with different situations in Cognee
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Install specific graph db dependency
run: |
poetry install
- name: Run parallel databases test
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
run: poetry run python ./cognee/tests/test_permissions.py

View file

@ -15,6 +15,10 @@
<a href="https://discord.gg/NQPKmU5CCg">Join Discord</a>
·
<a href="https://www.reddit.com/r/AIMemory/">Join r/AIMemory</a>
.
<a href="https://www.docs.cognee.ai">Docs</a>
.
<a href="https://github.com/topoteretes/cognee-community">cognee community repo</a>
</p>
@ -27,9 +31,16 @@
[![Contributors](https://img.shields.io/github/contributors/topoteretes/cognee?colorA=00C586&colorB=000000)](https://github.com/topoteretes/cognee/graphs/contributors)
<a href="https://github.com/sponsors/topoteretes"><img src="https://img.shields.io/badge/Sponsor-❤️-ff69b4.svg" alt="Sponsor"></a>
<a href="https://www.producthunt.com/posts/cognee?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-cognee" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=946346&theme=light&period=daily&t=1744472480704" alt="cognee - Memory&#0032;for&#0032;AI&#0032;Agents&#0032;&#0032;in&#0032;5&#0032;lines&#0032;of&#0032;code | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
<p>
<a href="https://www.producthunt.com/posts/cognee?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-cognee" target="_blank" style="display:inline-block; margin-right:10px;">
<img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=946346&theme=light&period=daily&t=1744472480704" alt="cognee - Memory&#0032;for&#0032;AI&#0032;Agents&#0032;&#0032;in&#0032;5&#0032;lines&#0032;of&#0032;code | Product Hunt" width="250" height="54" />
</a>
<a href="https://trendshift.io/repositories/13955" target="_blank" style="display:inline-block;">
<img src="https://trendshift.io/api/badge/repositories/13955" alt="topoteretes%2Fcognee | Trendshift" width="250" height="55" />
</a>
</p>
<a href="https://trendshift.io/repositories/13955" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13955" alt="topoteretes%2Fcognee | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
**🚀 We are launching Cognee SaaS: Sign up [here](https://www.cognee.ai/waitlist) for the hosted beta!**

View file

@ -1,6 +1,7 @@
import asyncio
from pydantic import BaseModel
from typing import Union, Optional
from uuid import UUID
from cognee.shared.logging_utils import get_logger
from cognee.shared.data_models import KnowledgeGraph
@ -29,7 +30,7 @@ update_status_lock = asyncio.Lock()
async def cognify(
datasets: Union[str, list[str]] = None,
datasets: Union[str, list[str], list[UUID]] = None,
user: User = None,
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,

View file

@ -55,7 +55,7 @@ async def delete(
# Handle different input types
if isinstance(data, str):
if data.startswith("file://"): # It's a file path
if data.startswith("file://") or data.startswith("/"): # It's a file path
with open(data.replace("file://", ""), mode="rb") as file:
classified_data = classify(file)
content_hash = classified_data.get_metadata()["content_hash"]
@ -77,7 +77,7 @@ async def delete(
# Handle list of inputs sequentially
results = []
for item in data:
result = await delete(item, dataset_name, dataset[0].id, mode)
result = await delete(item, dataset_name, dataset[0].id, mode, user=user)
results.append(result)
return {"status": "success", "message": "Multiple documents deleted", "results": results}
else: # It's already a BinaryIO

View file

@ -155,7 +155,7 @@ class MemgraphAdapter(GraphDBInterface):
MERGE (node {id: $node_id})
ON CREATE SET node:$node_label, node += $properties, node.updated_at = timestamp()
ON MATCH SET node:$node_label, node += $properties, node.updated_at = timestamp()
RETURN ID(node) AS internal_id,node.id AS nodeId
RETURN ID(node) AS internal_id, node.id AS nodeId
"""
params = {
@ -845,8 +845,8 @@ class MemgraphAdapter(GraphDBInterface):
result = await self.query(query)
edges = [
(
record["properties"]["source_node_id"],
record["properties"]["target_node_id"],
record["source"],
record["target"],
record["type"],
record["properties"],
)

View file

@ -57,7 +57,7 @@ async def search(
"""
# Use search function filtered by permissions if access control is enabled
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
return await permissions_search(
return await authorized_search(
query_text, query_type, user, dataset_ids, system_prompt_path, top_k
)
@ -143,7 +143,7 @@ async def specific_search(
return results
async def permissions_search(
async def authorized_search(
query_text: str,
query_type: SearchType,
user: User = None,
@ -190,7 +190,11 @@ async def specific_search_by_context(
search_results = await specific_search(
query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
)
return {dataset.name: search_results}
return {
"search_result": search_results,
"dataset_id": dataset.id,
"dataset_name": dataset.name,
}
# Search every dataset async based on query and appropriate database configuration
tasks = []

View file

@ -0,0 +1,203 @@
import os
import cognee
import pathlib
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.shared.logging_utils import get_logger
from cognee.modules.search.types import SearchType
from cognee.modules.users.methods import get_default_user, create_user
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
logger = get_logger()
async def main():
# Enable permissions feature
os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
# Clean up test directories before starting
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_permissions")
).resolve()
)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_permissions")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
explanation_file_path = os.path.join(
pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
)
# Add document for default user
await cognee.add([explanation_file_path], dataset_name="NLP")
default_user = await get_default_user()
text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
"""
# Add document for test user
test_user = await create_user("user@example.com", "example")
await cognee.add([text], dataset_name="QUANTUM", user=test_user)
await cognee.cognify(["NLP"], user=default_user)
await cognee.cognify(["QUANTUM"], user=test_user)
# Check if default_user can only see information from the NLP dataset
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="What is in the document?",
user=default_user,
)
assert len(search_results) == 1, "The search results list lenght is not one."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
assert search_results[0]["dataset_name"] == "NLP", (
f"Dict must contain dataset name 'NLP': {search_results[0]}"
)
# Check if test_user can only see information from the QUANTUM dataset
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="What is in the document?",
user=test_user,
)
assert len(search_results) == 1, "The search results list lenght is not one."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
assert search_results[0]["dataset_name"] == "QUANTUM", (
f"Dict must contain dataset name 'QUANTUM': {search_results[0]}"
)
# Try to add document with default_user to test_users dataset (test write permission enforcement)
test_user_dataset_id = search_results[0]["dataset_id"]
add_error = False
try:
await cognee.add(
[explanation_file_path],
dataset_name="QUANTUM",
dataset_id=test_user_dataset_id,
user=default_user,
)
except PermissionDeniedError:
add_error = True
assert add_error, "PermissionDeniedError was not raised during add as expected"
# Try to cognify with default_user the test_users dataset (test write permission enforcement)
cognify_error = False
try:
await cognee.cognify(datasets=[test_user_dataset_id], user=default_user)
except PermissionDeniedError:
cognify_error = True
assert cognify_error, "PermissionDeniedError was not raised during cognify as expected"
# Try to add permission for a dataset default_user does not have share permission for
give_permission_error = False
try:
await authorized_give_permission_on_datasets(
default_user.id,
[test_user_dataset_id],
"write",
default_user.id,
)
except PermissionDeniedError:
give_permission_error = True
assert give_permission_error, (
"PermissionDeniedError was not raised during assignment of permission as expected"
)
# Actually give permission to default_user to write on test_users dataset
await authorized_give_permission_on_datasets(
default_user.id,
[test_user_dataset_id],
"write",
test_user.id,
)
# Add new data to test_users dataset from default_user
await cognee.add(
[explanation_file_path],
dataset_name="QUANTUM",
dataset_id=test_user_dataset_id,
user=default_user,
)
await cognee.cognify(datasets=[test_user_dataset_id], user=default_user)
# Actually give permission to default_user to read on test_users dataset
await authorized_give_permission_on_datasets(
default_user.id,
[test_user_dataset_id],
"read",
test_user.id,
)
# Check if default_user can see from test_users datasets now
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="What is in the document?",
user=default_user,
dataset_ids=[test_user_dataset_id],
)
assert len(search_results) == 1, "The search results list length is not one."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
assert search_results[0]["dataset_name"] == "QUANTUM", (
f"Dict must contain dataset name 'QUANTUM': {search_results[0]}"
)
# Check if default_user can only see information from both datasets now
search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text="What is in the document?",
user=default_user,
)
assert len(search_results) == 2, "The search results list length is not two."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
# Try deleting data from test_user dataset with default_user without delete permission
delete_error = False
try:
await cognee.delete([text], dataset_id=test_user_dataset_id, user=default_user)
except PermissionDeniedError:
delete_error = True
assert delete_error, "PermissionDeniedError was not raised during delete operation as expected"
# Try deleting data from test_user dataset with test_user
await cognee.delete([text], dataset_id=test_user_dataset_id, user=test_user)
# Actually give permission to default_user to delete data for test_users dataset
await authorized_give_permission_on_datasets(
default_user.id,
[test_user_dataset_id],
"delete",
test_user.id,
)
# Try deleting data from test_user dataset with default_user after getting delete permission
await cognee.delete([explanation_file_path], dataset_id=test_user_dataset_id, user=default_user)
if __name__ == "__main__":
import asyncio
asyncio.run(main())

21
poetry.lock generated
View file

@ -1406,27 +1406,6 @@ files = [
{file = "coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0"},
{file = "coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d"},
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f"},
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb5838701ca68b10ebc0937dbd0eb81974bac54447c55cd58dea5bca8451029"},
{file = "coverage-7.9.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a25f814591a8c0c5372c11ac8967f669b97444c47fd794926e175c4047ece"},
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2d04b16a6062516df97969f1ae7efd0de9c31eb6ebdceaa0d213b21c0ca1a683"},
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7931b9e249edefb07cd6ae10c702788546341d5fe44db5b6108a25da4dca513f"},
{file = "coverage-7.9.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52e92b01041151bf607ee858e5a56c62d4b70f4dac85b8c8cb7fb8a351ab2c10"},
{file = "coverage-7.9.1-cp313-cp313t-win32.whl", hash = "sha256:684e2110ed84fd1ca5f40e89aa44adf1729dc85444004111aa01866507adf363"},
{file = "coverage-7.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:437c576979e4db840539674e68c84b3cda82bc824dd138d56bead1435f1cb5d7"},
{file = "coverage-7.9.1-cp313-cp313t-win_arm64.whl", hash = "sha256:18a0912944d70aaf5f399e350445738a1a20b50fbea788f640751c2ed9208b6c"},
{file = "coverage-7.9.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f424507f57878e424d9a95dc4ead3fbdd72fd201e404e861e465f28ea469951"},
{file = "coverage-7.9.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:535fde4001b2783ac80865d90e7cc7798b6b126f4cd8a8c54acfe76804e54e58"},
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02532fd3290bb8fa6bec876520842428e2a6ed6c27014eca81b031c2d30e3f71"},
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56f5eb308b17bca3bbff810f55ee26d51926d9f89ba92707ee41d3c061257e55"},
{file = "coverage-7.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfa447506c1a52271f1b0de3f42ea0fa14676052549095e378d5bff1c505ff7b"},
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9ca8e220006966b4a7b68e8984a6aee645a0384b0769e829ba60281fe61ec4f7"},
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:49f1d0788ba5b7ba65933f3a18864117c6506619f5ca80326b478f72acf3f385"},
{file = "coverage-7.9.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:68cd53aec6f45b8e4724c0950ce86eacb775c6be01ce6e3669fe4f3a21e768ed"},
{file = "coverage-7.9.1-cp39-cp39-win32.whl", hash = "sha256:95335095b6c7b1cc14c3f3f17d5452ce677e8490d101698562b2ffcacc304c8d"},
{file = "coverage-7.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:e1b5191d1648acc439b24721caab2fd0c86679d8549ed2c84d5a7ec1bedcc244"},
{file = "coverage-7.9.1-pp39.pp310.pp311-none-any.whl", hash = "sha256:db0f04118d1db74db6c9e1cb1898532c7dcc220f1d2718f058601f7c3f499514"},
{file = "coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c"},
{file = "coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec"},
]
[package.dependencies]

View file

@ -1,6 +1,6 @@
[project]
name = "cognee"
version = "0.2.0"
version = "0.2.1-dev"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },

2
uv.lock generated
View file

@ -866,7 +866,7 @@ wheels = [
[[package]]
name = "cognee"
version = "0.2.0"
version = "0.2.1.dev0"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },