Merge branch 'main' into main-merge-vol9

2026-01-13 14:22:22 +01:00 · 2026-01-13 14:22:22 +01:00 · b689d330ac
commit b689d330ac
parent ab990f7c5c 8a96a351e2
23 changed files with 5270 additions and 7068 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -76,7 +76,7 @@ git clone https://github.com/<your-github-username>/cognee.git
 cd cognee
 ```
 In case you are working on Vector and Graph Adapters
-1. Fork the [**cognee**](https://github.com/topoteretes/cognee-community) repository
+1. Fork the [**cognee-community**](https://github.com/topoteretes/cognee-community) repository
 2. Clone your fork:
 ```shell
 git clone https://github.com/<your-github-username>/cognee-community.git
@ -120,6 +120,21 @@ or
 uv run python examples/python/simple_example.py
 ```
 ### Running Simple Example
 Change .env.example into .env and provide your OPENAI_API_KEY as LLM_API_KEY
 Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
 ```shell
 python cognee/cognee/examples/python/simple_example.py
 ```
 or
 ```shell
 uv run python cognee/cognee/examples/python/simple_example.py
 ```
 ## 4. 📤 Submitting Changes
 1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure.
--- a/README.md
+++ b/README.md
@ -126,6 +126,7 @@ Now, run a minimal pipeline:
 ```python
 import cognee
 import asyncio
 from pprint import pprint
 async def main():
@ -143,7 +144,7 @@ async def main():
    # Display the results
    for result in results:
-        print(result)
+        pprint(result)
 if __name__ == '__main__':
--- a/cognee-frontend/package-lock.json
+++ b/cognee-frontend/package-lock.json
--- a/cognee-frontend/package.json
+++ b/cognee-frontend/package.json
@ -13,7 +13,7 @@
    "classnames": "^2.5.1",
    "culori": "^4.0.1",
    "d3-force-3d": "^3.0.6",
-    "next": "^16.1.7",
+    "next": "^16.1.1",
    "react": "^19.2.3",
    "react-dom": "^19.2.3",
    "react-force-graph-2d": "^1.27.1",
--- a/cognee-mcp/src/cognee_client.py
+++ b/cognee-mcp/src/cognee_client.py
@ -151,7 +151,7 @@ class CogneeClient:
        query_type: str,
        datasets: Optional[List[str]] = None,
        system_prompt: Optional[str] = None,
-        top_k: int = 10,
+        top_k: int = 5,
    ) -> Any:
        """
        Search the knowledge graph.
@ -192,7 +192,7 @@ class CogneeClient:
            with redirect_stdout(sys.stderr):
                results = await self.cognee.search(
-                    query_type=SearchType[query_type.upper()], query_text=query_text
+                    query_type=SearchType[query_type.upper()], query_text=query_text, top_k=top_k
                )
                return results
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -316,7 +316,7 @@ async def save_interaction(data: str) -> list:
@mcp.tool()
-async def search(search_query: str, search_type: str) -> list:
+async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
    """
    Search and query the knowledge graph for insights, information, and connections.
@ -389,6 +389,13 @@ async def search(search_query: str, search_type: str) -> list:
        The search_type is case-insensitive and will be converted to uppercase.
    top_k : int, optional
        Maximum number of results to return (default: 10).
        Controls the amount of context retrieved from the knowledge graph.
        - Lower values (3-5): Faster, more focused results
        - Higher values (10-20): More comprehensive, but slower and more context-heavy
        Helps manage response size and context window usage in MCP clients.
    Returns
    -------
    list
@ -425,13 +432,32 @@ async def search(search_query: str, search_type: str) -> list:
    """
-    async def search_task(search_query: str, search_type: str) -> str:
+    async def search_task(search_query: str, search_type: str, top_k: int) -> str:
-        """Search the knowledge graph"""
+        """
        Internal task to execute knowledge graph search with result formatting.
        Handles the actual search execution and formats results appropriately
        for MCP clients based on the search type and execution mode (API vs direct).
        Parameters
        ----------
        search_query : str
            The search query in natural language
        search_type : str
            Type of search to perform (GRAPH_COMPLETION, CHUNKS, etc.)
        top_k : int
            Maximum number of results to return
        Returns
        -------
        str
            Formatted search results as a string, with format depending on search_type
        """
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
        with redirect_stdout(sys.stderr):
            search_results = await cognee_client.search(
-                query_text=search_query, query_type=search_type
+                query_text=search_query, query_type=search_type, top_k=top_k
            )
            # Handle different result formats based on API vs direct mode
@ -465,7 +491,7 @@ async def search(search_query: str, search_type: str) -> list:
                else:
                    return str(search_results)
-    search_results = await search_task(search_query, search_type)
+    search_results = await search_task(search_query, search_type, top_k)
    return [types.TextContent(type="text", text=search_results)]
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -36,6 +36,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
    verbose: bool = False,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
    """
    Search and query the knowledge graph for insights, information, and connections.
@ -126,6 +127,8 @@ async def search(
        session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
        verbose: If True, returns detailed result information including graph representation (when possible).
    Returns:
        list: Search results in format determined by query_type:
@ -218,6 +221,7 @@ async def search(
        session_id=session_id,
        wide_search_top_k=wide_search_top_k,
        triplet_distance_penalty=triplet_distance_penalty,
        verbose=verbose,
    )
    return filtered_search_results
--- a/cognee/modules/engine/operations/setup.py
+++ b/cognee/modules/engine/operations/setup.py
@ -15,3 +15,9 @@ async def setup():
    """
    await create_relational_db_and_tables()
    await create_pgvector_db_and_tables()
 if __name__ == "__main__":
    import asyncio
    asyncio.run(setup())
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -49,6 +49,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
    verbose: bool = False,
 ) -> Union[CombinedSearchResult, List[SearchResult]]:
    """
@ -140,6 +141,7 @@ async def search(
    )
    if use_combined_context:
        # Note: combined context search must always be verbose and return a CombinedSearchResult with graphs info
        prepared_search_results = await prepare_search_result(
            search_results[0] if isinstance(search_results, list) else search_results
        )
@ -173,25 +175,30 @@ async def search(
                datasets = prepared_search_results["datasets"]
                if only_context:
-                    return_value.append(
+                    search_result_dict = {
-                        {
+                        "search_result": [context] if context else None,
-                            "search_result": [context] if context else None,
+                        "dataset_id": datasets[0].id,
-                            "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
-                            "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
-                            "dataset_tenant_id": datasets[0].tenant_id,
+                    }
-                            "graphs": graphs,
+                    if verbose:
-                        }
+                        # Include graphs only in verbose mode
-                    )
+                        search_result_dict["graphs"] = graphs
                    return_value.append(search_result_dict)
                else:
-                    return_value.append(
+                    search_result_dict = {
-                        {
+                        "search_result": [result] if result else None,
-                            "search_result": [result] if result else None,
+                        "dataset_id": datasets[0].id,
-                            "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
-                            "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
-                            "dataset_tenant_id": datasets[0].tenant_id,
+                    }
-                            "graphs": graphs,
+                    if verbose:
-                        }
+                        # Include graphs only in verbose mode
-                    )
+                        search_result_dict["graphs"] = graphs
                    return_value.append(search_result_dict)
            return return_value
        else:
            return_value = []
--- a/cognee/modules/visualization/cognee_network_visualization.py
+++ b/cognee/modules/visualization/cognee_network_visualization.py
@ -92,7 +92,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
        }
        links_list.append(link_data)
-    html_template = """
+    html_template = r"""
    <!DOCTYPE html>
    <html>
    <head>
--- a/cognee/tests/unit/modules/search/test_search.py
+++ b/cognee/tests/unit/modules/search/test_search.py
@ -129,14 +129,32 @@ async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, s
    monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
    monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)
-    out = await search_mod.search(
+    out_non_verbose = await search_mod.search(
        query_text="q",
        query_type=SearchType.CHUNKS,
        dataset_ids=[ds.id],
        user=user,
        verbose=False,
    )
-    assert out == [
+    assert out_non_verbose == [
        {
            "search_result": ["r"],
            "dataset_id": ds.id,
            "dataset_name": "ds1",
            "dataset_tenant_id": "t1",
        }
    ]
    out_verbose = await search_mod.search(
        query_text="q",
        query_type=SearchType.CHUNKS,
        dataset_ids=[ds.id],
        user=user,
        verbose=True,
    )
    assert out_verbose == [
        {
            "search_result": ["r"],
            "dataset_id": ds.id,
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -20,19 +20,29 @@ echo "HTTP port: $HTTP_PORT"
 # smooth redeployments and container restarts while maintaining data integrity.
 echo "Running database migrations..."
 set +e # Disable exit on error to handle specific migration errors
 MIGRATION_OUTPUT=$(alembic upgrade head)
 MIGRATION_EXIT_CODE=$?
 set -e
 if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
    if [[ "$MIGRATION_OUTPUT" == *"UserAlreadyExists"* ]] || [[ "$MIGRATION_OUTPUT" == *"User default_user@example.com already exists"* ]]; then
        echo "Warning: Default user already exists, continuing startup..."
    else
-        echo "Migration failed with unexpected error."
+        echo "Migration failed with unexpected error. Trying to run Cognee without migrations."
        exit 1
    fi
 fi
-echo "Database migrations done."
+        echo "Initializing database tables..."
        python /app/cognee/modules/engine/operations/setup.py
        INIT_EXIT_CODE=$?
        if [[ $INIT_EXIT_CODE -ne 0 ]]; then
            echo "Database initialization failed!"
            exit 1
        fi
    fi
 else
    echo "Database migrations done."
 fi
 echo "Starting server..."
--- a/examples/python/cognee_simple_document_demo.py
+++ b/examples/python/cognee_simple_document_demo.py
@ -1,8 +1,9 @@
 import asyncio
 import cognee
 import os
 from pprint import pprint
 # By default cognee uses OpenAI's gpt-5-mini LLM model
 # Provide your OpenAI LLM API KEY
 os.environ["LLM_API_KEY"] = ""
@ -24,13 +25,13 @@ async def cognee_demo():
    # Query Cognee for information from provided document
    answer = await cognee.search("List me all the important characters in Alice in Wonderland.")
-    print(answer)
+    pprint(answer)
    answer = await cognee.search("How did Alice end up in Wonderland?")
-    print(answer)
+    pprint(answer)
    answer = await cognee.search("Tell me about Alice's personality.")
-    print(answer)
+    pprint(answer)
 # Cognee is an async library, it has to be called in an async context
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@ -1,4 +1,5 @@
 import asyncio
 from pprint import pprint
 import cognee
 from cognee.api.v1.search import SearchType
@ -187,7 +188,7 @@ async def main(enable_steps):
        search_results = await cognee.search(
            query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
        )
-        print(search_results)
+        pprint(search_results)
 if __name__ == "__main__":
--- a/examples/python/multimedia_example.py
+++ b/examples/python/multimedia_example.py
@ -1,6 +1,8 @@
 import os
 import asyncio
 import pathlib
 from pprint import pprint
 from cognee.shared.logging_utils import setup_logging, ERROR
 import cognee
@ -42,7 +44,7 @@ async def main():
    # Display search results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)
 if __name__ == "__main__":
--- a/examples/python/ontology_demo_example.py
+++ b/examples/python/ontology_demo_example.py
@ -1,5 +1,6 @@
 import asyncio
 import os
 from pprint import pprint
 import cognee
 from cognee.api.v1.search import SearchType
@ -77,7 +78,7 @@ async def main():
        query_type=SearchType.GRAPH_COMPLETION,
        query_text="What are the exact cars and their types produced by Audi?",
    )
-    print(search_results)
+    pprint(search_results)
    await visualize_graph()
--- a/examples/python/permissions_example.py
+++ b/examples/python/permissions_example.py
@ -1,6 +1,7 @@
 import os
 import cognee
 import pathlib
 from pprint import pprint
 from cognee.modules.users.exceptions import PermissionDeniedError
 from cognee.modules.users.tenants.methods import select_tenant
@ -86,7 +87,7 @@ async def main():
    )
    print("\nSearch results as user_1 on dataset owned by user_1:")
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)
    # But user_1 cant read the dataset owned by user_2 (QUANTUM dataset)
    print("\nSearch result as user_1 on the dataset owned by user_2:")
@ -134,7 +135,7 @@ async def main():
        dataset_ids=[quantum_dataset_id],
    )
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)
    # If we'd like for user_1 to add new documents to the QUANTUM dataset owned by user_2, user_1 would have to get
    # "write" access permission, which user_1 currently does not have
@ -217,7 +218,7 @@ async def main():
        dataset_ids=[quantum_cognee_lab_dataset_id],
    )
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)
    # Note: All of these function calls and permission system is available through our backend endpoints as well
--- a/examples/python/run_custom_pipeline_example.py
+++ b/examples/python/run_custom_pipeline_example.py
@ -1,4 +1,6 @@
 import asyncio
 from pprint import pprint
 import cognee
 from cognee.modules.engine.operations.setup import setup
 from cognee.modules.users.methods import get_default_user
@ -71,7 +73,7 @@ async def main():
    print("Search results:")
    # Display results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)
 if __name__ == "__main__":
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -1,4 +1,6 @@
 import asyncio
 from pprint import pprint
 import cognee
 from cognee.shared.logging_utils import setup_logging, ERROR
 from cognee.api.v1.search import SearchType
@ -54,7 +56,7 @@ async def main():
    print("Search results:")
    # Display results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)
 if __name__ == "__main__":
--- a/examples/python/temporal_example.py
+++ b/examples/python/temporal_example.py
@ -1,4 +1,5 @@
 import asyncio
 from pprint import pprint
 import cognee
 from cognee.shared.logging_utils import setup_logging, INFO
 from cognee.api.v1.search import SearchType
@ -87,7 +88,8 @@ async def main():
            top_k=15,
        )
        print(f"Query: {query_text}")
-        print(f"Results: {search_results}\n")
+        print("Results:")
        pprint(search_results)
 if __name__ == "__main__":
--- a/examples/python/triplet_embeddings_example.py
+++ b/examples/python/triplet_embeddings_example.py
@ -1,4 +1,5 @@
 import asyncio
 from pprint import pprint
 import cognee
 from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
@ -65,7 +66,7 @@ async def main():
        query_type=SearchType.TRIPLET_COMPLETION,
        query_text="What are the models produced by Volkswagen based on the context?",
    )
-    print(search_results)
+    pprint(search_results)
 if __name__ == "__main__":
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "cognee"
-version = "0.5.1.dev0"
+version = "0.5.1"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
    { name = "Vasilije Markovic" },
--- a/uv.lock
+++ b/uv.lock