Merge branch 'main' into main-merge-vol9

This commit is contained in:
Igor Ilic 2026-01-13 14:22:22 +01:00
commit b689d330ac
23 changed files with 5270 additions and 7068 deletions

View file

@ -76,7 +76,7 @@ git clone https://github.com/<your-github-username>/cognee.git
cd cognee cd cognee
``` ```
In case you are working on Vector and Graph Adapters In case you are working on Vector and Graph Adapters
1. Fork the [**cognee**](https://github.com/topoteretes/cognee-community) repository 1. Fork the [**cognee-community**](https://github.com/topoteretes/cognee-community) repository
2. Clone your fork: 2. Clone your fork:
```shell ```shell
git clone https://github.com/<your-github-username>/cognee-community.git git clone https://github.com/<your-github-username>/cognee-community.git
@ -120,6 +120,21 @@ or
uv run python examples/python/simple_example.py uv run python examples/python/simple_example.py
``` ```
### Running Simple Example
Change .env.example into .env and provide your OPENAI_API_KEY as LLM_API_KEY
Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
```shell
python cognee/cognee/examples/python/simple_example.py
```
or
```shell
uv run python cognee/cognee/examples/python/simple_example.py
```
## 4. 📤 Submitting Changes ## 4. 📤 Submitting Changes
1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure. 1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure.

View file

@ -126,6 +126,7 @@ Now, run a minimal pipeline:
```python ```python
import cognee import cognee
import asyncio import asyncio
from pprint import pprint
async def main(): async def main():
@ -143,7 +144,7 @@ async def main():
# Display the results # Display the results
for result in results: for result in results:
print(result) pprint(result)
if __name__ == '__main__': if __name__ == '__main__':

File diff suppressed because it is too large Load diff

View file

@ -13,7 +13,7 @@
"classnames": "^2.5.1", "classnames": "^2.5.1",
"culori": "^4.0.1", "culori": "^4.0.1",
"d3-force-3d": "^3.0.6", "d3-force-3d": "^3.0.6",
"next": "^16.1.7", "next": "^16.1.1",
"react": "^19.2.3", "react": "^19.2.3",
"react-dom": "^19.2.3", "react-dom": "^19.2.3",
"react-force-graph-2d": "^1.27.1", "react-force-graph-2d": "^1.27.1",

View file

@ -151,7 +151,7 @@ class CogneeClient:
query_type: str, query_type: str,
datasets: Optional[List[str]] = None, datasets: Optional[List[str]] = None,
system_prompt: Optional[str] = None, system_prompt: Optional[str] = None,
top_k: int = 10, top_k: int = 5,
) -> Any: ) -> Any:
""" """
Search the knowledge graph. Search the knowledge graph.
@ -192,7 +192,7 @@ class CogneeClient:
with redirect_stdout(sys.stderr): with redirect_stdout(sys.stderr):
results = await self.cognee.search( results = await self.cognee.search(
query_type=SearchType[query_type.upper()], query_text=query_text query_type=SearchType[query_type.upper()], query_text=query_text, top_k=top_k
) )
return results return results

View file

@ -316,7 +316,7 @@ async def save_interaction(data: str) -> list:
@mcp.tool() @mcp.tool()
async def search(search_query: str, search_type: str) -> list: async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
""" """
Search and query the knowledge graph for insights, information, and connections. Search and query the knowledge graph for insights, information, and connections.
@ -389,6 +389,13 @@ async def search(search_query: str, search_type: str) -> list:
The search_type is case-insensitive and will be converted to uppercase. The search_type is case-insensitive and will be converted to uppercase.
top_k : int, optional
Maximum number of results to return (default: 10).
Controls the amount of context retrieved from the knowledge graph.
- Lower values (3-5): Faster, more focused results
- Higher values (10-20): More comprehensive, but slower and more context-heavy
Helps manage response size and context window usage in MCP clients.
Returns Returns
------- -------
list list
@ -425,13 +432,32 @@ async def search(search_query: str, search_type: str) -> list:
""" """
async def search_task(search_query: str, search_type: str) -> str: async def search_task(search_query: str, search_type: str, top_k: int) -> str:
"""Search the knowledge graph""" """
Internal task to execute knowledge graph search with result formatting.
Handles the actual search execution and formats results appropriately
for MCP clients based on the search type and execution mode (API vs direct).
Parameters
----------
search_query : str
The search query in natural language
search_type : str
Type of search to perform (GRAPH_COMPLETION, CHUNKS, etc.)
top_k : int
Maximum number of results to return
Returns
-------
str
Formatted search results as a string, with format depending on search_type
"""
# NOTE: MCP uses stdout to communicate, we must redirect all output # NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr. # going to stdout ( like the print function ) to stderr.
with redirect_stdout(sys.stderr): with redirect_stdout(sys.stderr):
search_results = await cognee_client.search( search_results = await cognee_client.search(
query_text=search_query, query_type=search_type query_text=search_query, query_type=search_type, top_k=top_k
) )
# Handle different result formats based on API vs direct mode # Handle different result formats based on API vs direct mode
@ -465,7 +491,7 @@ async def search(search_query: str, search_type: str) -> list:
else: else:
return str(search_results) return str(search_results)
search_results = await search_task(search_query, search_type) search_results = await search_task(search_query, search_type, top_k)
return [types.TextContent(type="text", text=search_results)] return [types.TextContent(type="text", text=search_results)]

View file

@ -36,6 +36,7 @@ async def search(
session_id: Optional[str] = None, session_id: Optional[str] = None,
wide_search_top_k: Optional[int] = 100, wide_search_top_k: Optional[int] = 100,
triplet_distance_penalty: Optional[float] = 3.5, triplet_distance_penalty: Optional[float] = 3.5,
verbose: bool = False,
) -> Union[List[SearchResult], CombinedSearchResult]: ) -> Union[List[SearchResult], CombinedSearchResult]:
""" """
Search and query the knowledge graph for insights, information, and connections. Search and query the knowledge graph for insights, information, and connections.
@ -126,6 +127,8 @@ async def search(
session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None. session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
verbose: If True, returns detailed result information including graph representation (when possible).
Returns: Returns:
list: Search results in format determined by query_type: list: Search results in format determined by query_type:
@ -218,6 +221,7 @@ async def search(
session_id=session_id, session_id=session_id,
wide_search_top_k=wide_search_top_k, wide_search_top_k=wide_search_top_k,
triplet_distance_penalty=triplet_distance_penalty, triplet_distance_penalty=triplet_distance_penalty,
verbose=verbose,
) )
return filtered_search_results return filtered_search_results

View file

@ -15,3 +15,9 @@ async def setup():
""" """
await create_relational_db_and_tables() await create_relational_db_and_tables()
await create_pgvector_db_and_tables() await create_pgvector_db_and_tables()
if __name__ == "__main__":
import asyncio
asyncio.run(setup())

View file

@ -49,6 +49,7 @@ async def search(
session_id: Optional[str] = None, session_id: Optional[str] = None,
wide_search_top_k: Optional[int] = 100, wide_search_top_k: Optional[int] = 100,
triplet_distance_penalty: Optional[float] = 3.5, triplet_distance_penalty: Optional[float] = 3.5,
verbose: bool = False,
) -> Union[CombinedSearchResult, List[SearchResult]]: ) -> Union[CombinedSearchResult, List[SearchResult]]:
""" """
@ -140,6 +141,7 @@ async def search(
) )
if use_combined_context: if use_combined_context:
# Note: combined context search must always be verbose and return a CombinedSearchResult with graphs info
prepared_search_results = await prepare_search_result( prepared_search_results = await prepare_search_result(
search_results[0] if isinstance(search_results, list) else search_results search_results[0] if isinstance(search_results, list) else search_results
) )
@ -173,25 +175,30 @@ async def search(
datasets = prepared_search_results["datasets"] datasets = prepared_search_results["datasets"]
if only_context: if only_context:
return_value.append( search_result_dict = {
{ "search_result": [context] if context else None,
"search_result": [context] if context else None, "dataset_id": datasets[0].id,
"dataset_id": datasets[0].id, "dataset_name": datasets[0].name,
"dataset_name": datasets[0].name, "dataset_tenant_id": datasets[0].tenant_id,
"dataset_tenant_id": datasets[0].tenant_id, }
"graphs": graphs, if verbose:
} # Include graphs only in verbose mode
) search_result_dict["graphs"] = graphs
return_value.append(search_result_dict)
else: else:
return_value.append( search_result_dict = {
{ "search_result": [result] if result else None,
"search_result": [result] if result else None, "dataset_id": datasets[0].id,
"dataset_id": datasets[0].id, "dataset_name": datasets[0].name,
"dataset_name": datasets[0].name, "dataset_tenant_id": datasets[0].tenant_id,
"dataset_tenant_id": datasets[0].tenant_id, }
"graphs": graphs, if verbose:
} # Include graphs only in verbose mode
) search_result_dict["graphs"] = graphs
return_value.append(search_result_dict)
return return_value return return_value
else: else:
return_value = [] return_value = []

View file

@ -92,7 +92,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
} }
links_list.append(link_data) links_list.append(link_data)
html_template = """ html_template = r"""
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>

View file

@ -129,14 +129,32 @@ async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, s
monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True) monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search) monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)
out = await search_mod.search( out_non_verbose = await search_mod.search(
query_text="q", query_text="q",
query_type=SearchType.CHUNKS, query_type=SearchType.CHUNKS,
dataset_ids=[ds.id], dataset_ids=[ds.id],
user=user, user=user,
verbose=False,
) )
assert out == [ assert out_non_verbose == [
{
"search_result": ["r"],
"dataset_id": ds.id,
"dataset_name": "ds1",
"dataset_tenant_id": "t1",
}
]
out_verbose = await search_mod.search(
query_text="q",
query_type=SearchType.CHUNKS,
dataset_ids=[ds.id],
user=user,
verbose=True,
)
assert out_verbose == [
{ {
"search_result": ["r"], "search_result": ["r"],
"dataset_id": ds.id, "dataset_id": ds.id,

View file

@ -20,19 +20,29 @@ echo "HTTP port: $HTTP_PORT"
# smooth redeployments and container restarts while maintaining data integrity. # smooth redeployments and container restarts while maintaining data integrity.
echo "Running database migrations..." echo "Running database migrations..."
set +e # Disable exit on error to handle specific migration errors
MIGRATION_OUTPUT=$(alembic upgrade head) MIGRATION_OUTPUT=$(alembic upgrade head)
MIGRATION_EXIT_CODE=$? MIGRATION_EXIT_CODE=$?
set -e
if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
if [[ "$MIGRATION_OUTPUT" == *"UserAlreadyExists"* ]] || [[ "$MIGRATION_OUTPUT" == *"User default_user@example.com already exists"* ]]; then if [[ "$MIGRATION_OUTPUT" == *"UserAlreadyExists"* ]] || [[ "$MIGRATION_OUTPUT" == *"User default_user@example.com already exists"* ]]; then
echo "Warning: Default user already exists, continuing startup..." echo "Warning: Default user already exists, continuing startup..."
else else
echo "Migration failed with unexpected error." echo "Migration failed with unexpected error. Trying to run Cognee without migrations."
exit 1
fi
fi
echo "Database migrations done." echo "Initializing database tables..."
python /app/cognee/modules/engine/operations/setup.py
INIT_EXIT_CODE=$?
if [[ $INIT_EXIT_CODE -ne 0 ]]; then
echo "Database initialization failed!"
exit 1
fi
fi
else
echo "Database migrations done."
fi
echo "Starting server..." echo "Starting server..."

View file

@ -1,8 +1,9 @@
import asyncio import asyncio
import cognee import cognee
import os import os
from pprint import pprint
# By default cognee uses OpenAI's gpt-5-mini LLM model # By default cognee uses OpenAI's gpt-5-mini LLM model
# Provide your OpenAI LLM API KEY # Provide your OpenAI LLM API KEY
os.environ["LLM_API_KEY"] = "" os.environ["LLM_API_KEY"] = ""
@ -24,13 +25,13 @@ async def cognee_demo():
# Query Cognee for information from provided document # Query Cognee for information from provided document
answer = await cognee.search("List me all the important characters in Alice in Wonderland.") answer = await cognee.search("List me all the important characters in Alice in Wonderland.")
print(answer) pprint(answer)
answer = await cognee.search("How did Alice end up in Wonderland?") answer = await cognee.search("How did Alice end up in Wonderland?")
print(answer) pprint(answer)
answer = await cognee.search("Tell me about Alice's personality.") answer = await cognee.search("Tell me about Alice's personality.")
print(answer) pprint(answer)
# Cognee is an async library, it has to be called in an async context # Cognee is an async library, it has to be called in an async context

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
from pprint import pprint
import cognee import cognee
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
@ -187,7 +188,7 @@ async def main(enable_steps):
search_results = await cognee.search( search_results = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?" query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
) )
print(search_results) pprint(search_results)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,6 +1,8 @@
import os import os
import asyncio import asyncio
import pathlib import pathlib
from pprint import pprint
from cognee.shared.logging_utils import setup_logging, ERROR from cognee.shared.logging_utils import setup_logging, ERROR
import cognee import cognee
@ -42,7 +44,7 @@ async def main():
# Display search results # Display search results
for result_text in search_results: for result_text in search_results:
print(result_text) pprint(result_text)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,5 +1,6 @@
import asyncio import asyncio
import os import os
from pprint import pprint
import cognee import cognee
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
@ -77,7 +78,7 @@ async def main():
query_type=SearchType.GRAPH_COMPLETION, query_type=SearchType.GRAPH_COMPLETION,
query_text="What are the exact cars and their types produced by Audi?", query_text="What are the exact cars and their types produced by Audi?",
) )
print(search_results) pprint(search_results)
await visualize_graph() await visualize_graph()

View file

@ -1,6 +1,7 @@
import os import os
import cognee import cognee
import pathlib import pathlib
from pprint import pprint
from cognee.modules.users.exceptions import PermissionDeniedError from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.modules.users.tenants.methods import select_tenant from cognee.modules.users.tenants.methods import select_tenant
@ -86,7 +87,7 @@ async def main():
) )
print("\nSearch results as user_1 on dataset owned by user_1:") print("\nSearch results as user_1 on dataset owned by user_1:")
for result in search_results: for result in search_results:
print(f"{result}\n") pprint(result)
# But user_1 cant read the dataset owned by user_2 (QUANTUM dataset) # But user_1 cant read the dataset owned by user_2 (QUANTUM dataset)
print("\nSearch result as user_1 on the dataset owned by user_2:") print("\nSearch result as user_1 on the dataset owned by user_2:")
@ -134,7 +135,7 @@ async def main():
dataset_ids=[quantum_dataset_id], dataset_ids=[quantum_dataset_id],
) )
for result in search_results: for result in search_results:
print(f"{result}\n") pprint(result)
# If we'd like for user_1 to add new documents to the QUANTUM dataset owned by user_2, user_1 would have to get # If we'd like for user_1 to add new documents to the QUANTUM dataset owned by user_2, user_1 would have to get
# "write" access permission, which user_1 currently does not have # "write" access permission, which user_1 currently does not have
@ -217,7 +218,7 @@ async def main():
dataset_ids=[quantum_cognee_lab_dataset_id], dataset_ids=[quantum_cognee_lab_dataset_id],
) )
for result in search_results: for result in search_results:
print(f"{result}\n") pprint(result)
# Note: All of these function calls and permission system is available through our backend endpoints as well # Note: All of these function calls and permission system is available through our backend endpoints as well

View file

@ -1,4 +1,6 @@
import asyncio import asyncio
from pprint import pprint
import cognee import cognee
from cognee.modules.engine.operations.setup import setup from cognee.modules.engine.operations.setup import setup
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
@ -71,7 +73,7 @@ async def main():
print("Search results:") print("Search results:")
# Display results # Display results
for result_text in search_results: for result_text in search_results:
print(result_text) pprint(result_text)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,4 +1,6 @@
import asyncio import asyncio
from pprint import pprint
import cognee import cognee
from cognee.shared.logging_utils import setup_logging, ERROR from cognee.shared.logging_utils import setup_logging, ERROR
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
@ -54,7 +56,7 @@ async def main():
print("Search results:") print("Search results:")
# Display results # Display results
for result_text in search_results: for result_text in search_results:
print(result_text) pprint(result_text)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
from pprint import pprint
import cognee import cognee
from cognee.shared.logging_utils import setup_logging, INFO from cognee.shared.logging_utils import setup_logging, INFO
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
@ -87,7 +88,8 @@ async def main():
top_k=15, top_k=15,
) )
print(f"Query: {query_text}") print(f"Query: {query_text}")
print(f"Results: {search_results}\n") print("Results:")
pprint(search_results)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,4 +1,5 @@
import asyncio import asyncio
from pprint import pprint
import cognee import cognee
from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
@ -65,7 +66,7 @@ async def main():
query_type=SearchType.TRIPLET_COMPLETION, query_type=SearchType.TRIPLET_COMPLETION,
query_text="What are the models produced by Volkswagen based on the context?", query_text="What are the models produced by Volkswagen based on the context?",
) )
print(search_results) pprint(search_results)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,7 +1,7 @@
[project] [project]
name = "cognee" name = "cognee"
version = "0.5.1.dev0" version = "0.5.1"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [ authors = [
{ name = "Vasilije Markovic" }, { name = "Vasilije Markovic" },

9461
uv.lock generated

File diff suppressed because it is too large Load diff