Main merge vol7 (#1509)

<!-- .github/pull_request_template.md -->

## Description
Merge main to dev

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
Igor Ilic 2025-10-07 20:17:30 +02:00 committed by GitHub
commit f81d427466
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 32 additions and 1540 deletions

View file

@ -58,7 +58,7 @@ body:
- Python version: [e.g. 3.9.0] - Python version: [e.g. 3.9.0]
- Cognee version: [e.g. 0.1.0] - Cognee version: [e.g. 0.1.0]
- LLM Provider: [e.g. OpenAI, Ollama] - LLM Provider: [e.g. OpenAI, Ollama]
- Database: [e.g. Neo4j, FalkorDB] - Database: [e.g. Neo4j]
validations: validations:
required: true required: true

View file

@ -76,6 +76,9 @@ Get started quickly with a Google Colab <a href="https://colab.research.google.
## About cognee ## About cognee
cognee works locally and stores your data on your device.
Our hosted solution is just our deployment of OSS cognee on Modal, with the goal of making development and productionization easier.
Self-hosted package: Self-hosted package:
- Interconnects any kind of documents: past conversations, files, images, and audio transcriptions - Interconnects any kind of documents: past conversations, files, images, and audio transcriptions

View file

@ -37,4 +37,5 @@ dev = [
allow-direct-references = true allow-direct-references = true
[project.scripts] [project.scripts]
cognee-mcp = "src:main" cognee = "src:main"
cognee-mcp = "src:main_mcp"

View file

@ -1,8 +1,28 @@
from .server import main as server_main from .server import main as server_main
import warnings
def main(): def main():
"""Main entry point for the package.""" """Deprecated main entry point for the package."""
import asyncio
# Show deprecation warning
warnings.warn(
"The 'cognee' command for cognee-mcp is deprecated and will be removed in a future version. "
"Please use 'cognee-mcp' instead to avoid conflicts with the main cognee library.",
DeprecationWarning,
stacklevel=2,
)
print("⚠️ DEPRECATION WARNING: Use 'cognee-mcp' command instead of 'cognee'")
print(" This avoids conflicts with the main cognee library.")
print()
asyncio.run(server_main())
def main_mcp():
"""Clean main entry point for cognee-mcp command."""
import asyncio import asyncio
asyncio.run(server_main()) asyncio.run(server_main())

View file

@ -44,16 +44,14 @@ def create_graph_engine(
Parameters: Parameters:
----------- -----------
- graph_database_provider: The type of graph database provider to use (e.g., neo4j, - graph_database_provider: The type of graph database provider to use (e.g., neo4j, falkor, kuzu).
falkordb, kuzu). - graph_database_url: The URL for the graph database instance. Required for neo4j and falkordb providers.
- graph_database_url: The URL for the graph database instance. Required for neo4j
and falkordb providers.
- graph_database_username: The username for authentication with the graph database. - graph_database_username: The username for authentication with the graph database.
Required for neo4j provider. Required for neo4j provider.
- graph_database_password: The password for authentication with the graph database. - graph_database_password: The password for authentication with the graph database.
Required for neo4j provider. Required for neo4j provider.
- graph_database_port: The port number for the graph database connection. Required - graph_database_port: The port number for the graph database connection. Required
for the falkordb provider. for the falkordb provider
- graph_file_path: The filesystem path to the graph file. Required for the kuzu - graph_file_path: The filesystem path to the graph file. Required for the kuzu
provider. provider.
@ -86,21 +84,6 @@ def create_graph_engine(
graph_database_name=graph_database_name or None, graph_database_name=graph_database_name or None,
) )
elif graph_database_provider == "falkordb":
if not (graph_database_url and graph_database_port):
raise EnvironmentError("Missing required FalkorDB credentials.")
from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
from cognee.infrastructure.databases.hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
embedding_engine = get_embedding_engine()
return FalkorDBAdapter(
database_url=graph_database_url,
database_port=graph_database_port,
embedding_engine=embedding_engine,
)
elif graph_database_provider == "kuzu": elif graph_database_provider == "kuzu":
if not graph_file_path: if not graph_file_path:
raise EnvironmentError("Missing required Kuzu database path.") raise EnvironmentError("Missing required Kuzu database path.")
@ -179,5 +162,5 @@ def create_graph_engine(
raise EnvironmentError( raise EnvironmentError(
f"Unsupported graph database provider: {graph_database_provider}. " f"Unsupported graph database provider: {graph_database_provider}. "
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}" f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
) )

View file

@ -19,8 +19,7 @@ def create_vector_engine(
for each provider, raising an EnvironmentError if any are missing, or ImportError if the for each provider, raising an EnvironmentError if any are missing, or ImportError if the
ChromaDB package is not installed. ChromaDB package is not installed.
Supported providers include: pgvector, FalkorDB, ChromaDB, and Supported providers include: pgvector, ChromaDB, and LanceDB.
LanceDB.
Parameters: Parameters:
----------- -----------
@ -79,18 +78,6 @@ def create_vector_engine(
embedding_engine, embedding_engine,
) )
elif vector_db_provider == "falkordb":
if not (vector_db_url and vector_db_port):
raise EnvironmentError("Missing requred FalkorDB credentials!")
from ..hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
return FalkorDBAdapter(
database_url=vector_db_url,
database_port=vector_db_port,
embedding_engine=embedding_engine,
)
elif vector_db_provider == "chromadb": elif vector_db_provider == "chromadb":
try: try:
import chromadb import chromadb

View file

@ -288,7 +288,6 @@ class SummarizedCode(BaseModel):
class GraphDBType(Enum): class GraphDBType(Enum):
NETWORKX = auto() NETWORKX = auto()
NEO4J = auto() NEO4J = auto()
FALKORDB = auto()
KUZU = auto() KUZU = auto()

View file

@ -1,174 +0,0 @@
import os
import cognee
import pathlib
from cognee.infrastructure.files.storage import get_storage_config
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
from cognee.modules.search.types import SearchType
logger = get_logger()
async def check_falkordb_connection():
"""Check if FalkorDB is available at localhost:6379"""
try:
from falkordb import FalkorDB
client = FalkorDB(host="localhost", port=6379)
# Try to list graphs to check connection
client.list_graphs()
return True
except Exception as e:
logger.warning(f"FalkorDB not available at localhost:6379: {e}")
return False
async def main():
# Check if FalkorDB is available
if not await check_falkordb_connection():
print("⚠️ FalkorDB is not available at localhost:6379")
print(" To run this test, start FalkorDB server:")
print(" docker run -p 6379:6379 falkordb/falkordb:latest")
print(" Skipping FalkorDB test...")
return
print("✅ FalkorDB connection successful, running test...")
# Configure FalkorDB as the graph database provider
cognee.config.set_graph_db_config(
{
"graph_database_url": "localhost", # FalkorDB URL (using Redis protocol)
"graph_database_port": 6379,
"graph_database_provider": "falkordb",
}
)
# Configure FalkorDB as the vector database provider too since it's a hybrid adapter
cognee.config.set_vector_db_config(
{
"vector_db_url": "localhost",
"vector_db_port": 6379,
"vector_db_provider": "falkordb",
}
)
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_falkordb")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_falkordb")
).resolve()
)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
dataset_name = "artificial_intelligence"
ai_text_file_path = os.path.join(
pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
)
await cognee.add([ai_text_file_path], dataset_name)
text = """A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process. LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word.
LLMs are artificial neural networks. The largest and most capable, as of March 2024, are built with a decoder-only transformer-based architecture while some recent implementations are based on other architectures, such as recurrent neural network variants and Mamba (a state space model).
Up to 2020, fine tuning was the only way a model could be adapted to be able to accomplish specific tasks. Larger sized models, such as GPT-3, however, can be prompt-engineered to achieve similar results.[6] They are thought to acquire knowledge about syntax, semantics and "ontology" inherent in human language corpora, but also inaccuracies and biases present in the corpora.
Some notable LLMs are OpenAI's GPT series of models (e.g., GPT-3.5 and GPT-4, used in ChatGPT and Microsoft Copilot), Google's PaLM and Gemini (the latter of which is currently used in the chatbot of the same name), xAI's Grok, Meta's LLaMA family of open-source models, Anthropic's Claude models, Mistral AI's open source models, and Databricks' open source DBRX.
"""
await cognee.add([text], dataset_name)
await cognee.cognify([dataset_name])
from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
random_node = (await vector_engine.search("entity.name", "AI"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:
print(f"{result}\n")
user = await get_default_user()
history = await get_history(user.id)
assert len(history) == 6, "Search history is not correct."
# Assert local data files are cleaned properly
await cognee.prune.prune_data()
data_root_directory = get_storage_config()["data_root_directory"]
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
# Assert relational, vector and graph databases have been cleaned properly
await cognee.prune.prune_system(metadata=True)
# For FalkorDB vector engine, check if collections are empty
# Since FalkorDB is a hybrid adapter, we can check if the graph is empty
# as the vector data is stored in the same graph
if hasattr(vector_engine, "driver"):
# This is FalkorDB - check if graphs exist
collections = vector_engine.driver.list_graphs()
# The graph should be deleted, so either no graphs or empty graph
if vector_engine.graph_name in collections:
# Graph exists but should be empty
vector_graph_data = await vector_engine.get_graph_data()
vector_nodes, vector_edges = vector_graph_data
assert len(vector_nodes) == 0 and len(vector_edges) == 0, (
"FalkorDB vector database is not empty"
)
else:
# Fallback for other vector engines like LanceDB
connection = await vector_engine.get_connection()
collection_names = await connection.table_names()
assert len(collection_names) == 0, "Vector database is not empty"
from cognee.infrastructure.databases.relational import get_relational_engine
assert not os.path.exists(get_relational_engine().db_path), (
"SQLite relational database is not empty"
)
# For FalkorDB, check if the graph database is empty
from cognee.infrastructure.databases.graph import get_graph_engine
graph_engine = get_graph_engine()
graph_data = await graph_engine.get_graph_data()
nodes, edges = graph_data
assert len(nodes) == 0 and len(edges) == 0, "FalkorDB graph database is not empty"
print("🎉 FalkorDB test completed successfully!")
print(" ✓ Data ingestion worked")
print(" ✓ Cognify processing worked")
print(" ✓ Search operations worked")
print(" ✓ Cleanup worked")
if __name__ == "__main__":
import asyncio
asyncio.run(main(), debug=True)

View file

@ -95,7 +95,6 @@ mistral = ["mistral-common>=1.5.2,<2"]
anthropic = ["anthropic>=0.26.1,<0.27"] anthropic = ["anthropic>=0.26.1,<0.27"]
deepeval = ["deepeval>=2.0.1,<3"] deepeval = ["deepeval>=2.0.1,<3"]
posthog = ["posthog>=3.5.0,<4"] posthog = ["posthog>=3.5.0,<4"]
falkordb = ["falkordb>=1.0.9,<2.0.0"]
groq = ["groq>=0.8.0,<1.0.0"] groq = ["groq>=0.8.0,<1.0.0"]
chromadb = [ chromadb = [
"chromadb>=0.3.0,<0.7", "chromadb>=0.3.0,<0.7",

View file

@ -96,17 +96,6 @@ services:
networks: networks:
- cognee-network - cognee-network
falkordb:
image: falkordb/falkordb:edge
container_name: falkordb
profiles:
- falkordb
ports:
- 6379:6379
- 3001:3000
networks:
- cognee-network
chromadb: chromadb:
image: chromadb/chroma:0.6.3 image: chromadb/chroma:0.6.3
container_name: chromadb container_name: chromadb

View file

@ -1,87 +0,0 @@
import os
import pathlib
import asyncio
import cognee
from cognee.modules.search.types import SearchType
async def main():
"""
Example script demonstrating how to use Cognee with FalkorDB
This example:
1. Configures Cognee to use FalkorDB as graph database
2. Sets up data directories
3. Adds sample data to Cognee
4. Processes (cognifies) the data
5. Performs different types of searches
"""
# Configure FalkorDB as the graph database provider
cognee.config.set_graph_db_config(
{
"graph_database_url": "localhost", # FalkorDB URL (using Redis protocol)
"graph_database_port": 6379,
"graph_database_provider": "falkordb",
}
)
# Set up data directories for storing documents and system files
# You should adjust these paths to your needs
current_dir = pathlib.Path(__file__).parent
data_directory_path = str(current_dir / "data_storage")
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(current_dir / "cognee_system")
cognee.config.system_root_directory(cognee_directory_path)
# Clean any existing data (optional)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
# Create a dataset
dataset_name = "falkordb_example"
# Add sample text to the dataset
sample_text = """FalkorDB is a graph database that evolved from RedisGraph.
It is focused on providing high-performance graph operations.
FalkorDB uses sparse adjacency matrices to represent the graph data structure.
It supports the Cypher query language for querying graph data.
FalkorDB can be integrated with vector search capabilities for AI applications.
It provides a Redis module, allowing users to leverage Redis's features alongside graph capabilities."""
# Add the sample text to the dataset
await cognee.add([sample_text], dataset_name)
# Process the added document to extract knowledge
await cognee.cognify([dataset_name])
# Now let's perform some searches
# 1. Search for insights related to "FalkorDB"
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="FalkorDB")
print("\nInsights about FalkorDB:")
for result in insights_results:
print(f"- {result}")
# 2. Search for text chunks related to "graph database"
chunks_results = await cognee.search(
query_type=SearchType.CHUNKS, query_text="graph database", datasets=[dataset_name]
)
print("\nChunks about graph database:")
for result in chunks_results:
print(f"- {result}")
# 3. Get graph completion related to databases
graph_completion_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="database"
)
print("\nGraph completion for databases:")
for result in graph_completion_results:
print(f"- {result}")
# Clean up (optional)
# await cognee.prune.prune_data()
# await cognee.prune.prune_system(metadata=True)
if __name__ == "__main__":
asyncio.run(main())

View file

@ -88,7 +88,6 @@ mistral = ["mistral-common>=1.5.2,<2"]
anthropic = ["anthropic>=0.27"] anthropic = ["anthropic>=0.27"]
deepeval = ["deepeval>=3.0.1,<4"] deepeval = ["deepeval>=3.0.1,<4"]
posthog = ["posthog>=3.5.0,<4"] posthog = ["posthog>=3.5.0,<4"]
falkordb = ["falkordb>=1.0.9,<2.0.0"]
groq = ["groq>=0.8.0,<1.0.0"] groq = ["groq>=0.8.0,<1.0.0"]
chromadb = [ chromadb = [
"chromadb>=0.6,<0.7", "chromadb>=0.6,<0.7",