Merge branch 'main' into main-merge-vol9

2026-01-13 14:22:22 +01:00 · 2026-01-13 14:22:22 +01:00 · b689d330ac
commit b689d330ac
parent ab990f7c5c 8a96a351e2
23 changed files with 5270 additions and 7068 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -76,7 +76,7 @@ git clone https://github.com/<your-github-username>/cognee.git
 cd cognee
 ```
 In case you are working on Vector and Graph Adapters
-1. Fork the [**cognee**](https://github.com/topoteretes/cognee-community) repository
+1. Fork the [**cognee-community**](https://github.com/topoteretes/cognee-community) repository
 2. Clone your fork:
 ```shell
 git clone https://github.com/<your-github-username>/cognee-community.git
@ -120,6 +120,21 @@ or
 uv run python examples/python/simple_example.py
 ```

+### Running Simple Example
+
+Change .env.example into .env and provide your OPENAI_API_KEY as LLM_API_KEY
+
+Make sure to run ```shell uv sync ``` in the root cloned folder or set up a virtual environment to run cognee
+
+```shell
+python cognee/cognee/examples/python/simple_example.py
+```
+or
+
+```shell
+uv run python cognee/cognee/examples/python/simple_example.py
+```
+
 ## 4. 📤 Submitting Changes

 1. Make sure that `pre-commit` and hooks are installed. See `Required tools` section for more information. Try executing `pre-commit run` if you are not sure.
--- a/README.md
+++ b/README.md
@ -126,6 +126,7 @@ Now, run a minimal pipeline:
 ```python
 import cognee
 import asyncio
+from pprint import pprint


 async def main():
@ -143,7 +144,7 @@ async def main():

    # Display the results
    for result in results:
-        print(result)
+        pprint(result)


 if __name__ == '__main__':
--- a/cognee-frontend/package-lock.json
+++ b/cognee-frontend/package-lock.json
--- a/cognee-frontend/package.json
+++ b/cognee-frontend/package.json
@ -13,7 +13,7 @@
    "classnames": "^2.5.1",
    "culori": "^4.0.1",
    "d3-force-3d": "^3.0.6",
-    "next": "^16.1.7",
+    "next": "^16.1.1",
    "react": "^19.2.3",
    "react-dom": "^19.2.3",
    "react-force-graph-2d": "^1.27.1",
--- a/cognee-mcp/src/cognee_client.py
+++ b/cognee-mcp/src/cognee_client.py
@ -151,7 +151,7 @@ class CogneeClient:
        query_type: str,
        datasets: Optional[List[str]] = None,
        system_prompt: Optional[str] = None,
-        top_k: int = 10,
+        top_k: int = 5,
    ) -> Any:
        """
        Search the knowledge graph.
@ -192,7 +192,7 @@ class CogneeClient:

            with redirect_stdout(sys.stderr):
                results = await self.cognee.search(
-                    query_type=SearchType[query_type.upper()], query_text=query_text
+                    query_type=SearchType[query_type.upper()], query_text=query_text, top_k=top_k
                )
                return results

--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -316,7 +316,7 @@ async def save_interaction(data: str) -> list:


@mcp.tool()
-async def search(search_query: str, search_type: str) -> list:
+async def search(search_query: str, search_type: str, top_k: int = 10) -> list:
    """
    Search and query the knowledge graph for insights, information, and connections.

@ -389,6 +389,13 @@ async def search(search_query: str, search_type: str) -> list:

        The search_type is case-insensitive and will be converted to uppercase.

+    top_k : int, optional
+        Maximum number of results to return (default: 10).
+        Controls the amount of context retrieved from the knowledge graph.
+        - Lower values (3-5): Faster, more focused results
+        - Higher values (10-20): More comprehensive, but slower and more context-heavy
+        Helps manage response size and context window usage in MCP clients.
+
    Returns
    -------
    list
@ -425,13 +432,32 @@ async def search(search_query: str, search_type: str) -> list:

    """

-    async def search_task(search_query: str, search_type: str) -> str:
-        """Search the knowledge graph"""
+    async def search_task(search_query: str, search_type: str, top_k: int) -> str:
+        """
+        Internal task to execute knowledge graph search with result formatting.
+
+        Handles the actual search execution and formats results appropriately
+        for MCP clients based on the search type and execution mode (API vs direct).
+
+        Parameters
+        ----------
+        search_query : str
+            The search query in natural language
+        search_type : str
+            Type of search to perform (GRAPH_COMPLETION, CHUNKS, etc.)
+        top_k : int
+            Maximum number of results to return
+
+        Returns
+        -------
+        str
+            Formatted search results as a string, with format depending on search_type
+        """
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
        with redirect_stdout(sys.stderr):
            search_results = await cognee_client.search(
-                query_text=search_query, query_type=search_type
+                query_text=search_query, query_type=search_type, top_k=top_k
            )

            # Handle different result formats based on API vs direct mode
@ -465,7 +491,7 @@ async def search(search_query: str, search_type: str) -> list:
                else:
                    return str(search_results)

-    search_results = await search_task(search_query, search_type)
+    search_results = await search_task(search_query, search_type, top_k)
    return [types.TextContent(type="text", text=search_results)]


--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@ -36,6 +36,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
+    verbose: bool = False,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
    """
    Search and query the knowledge graph for insights, information, and connections.
@ -126,6 +127,8 @@ async def search(

        session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.

+        verbose: If True, returns detailed result information including graph representation (when possible).
+
    Returns:
        list: Search results in format determined by query_type:

@ -218,6 +221,7 @@ async def search(
        session_id=session_id,
        wide_search_top_k=wide_search_top_k,
        triplet_distance_penalty=triplet_distance_penalty,
+        verbose=verbose,
    )

    return filtered_search_results
--- a/cognee/modules/engine/operations/setup.py
+++ b/cognee/modules/engine/operations/setup.py
@ -15,3 +15,9 @@ async def setup():
    """
    await create_relational_db_and_tables()
    await create_pgvector_db_and_tables()
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(setup())
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -49,6 +49,7 @@ async def search(
    session_id: Optional[str] = None,
    wide_search_top_k: Optional[int] = 100,
    triplet_distance_penalty: Optional[float] = 3.5,
+    verbose: bool = False,
 ) -> Union[CombinedSearchResult, List[SearchResult]]:
    """

@ -140,6 +141,7 @@ async def search(
    )

    if use_combined_context:
+        # Note: combined context search must always be verbose and return a CombinedSearchResult with graphs info
        prepared_search_results = await prepare_search_result(
            search_results[0] if isinstance(search_results, list) else search_results
        )
@ -173,25 +175,30 @@ async def search(
                datasets = prepared_search_results["datasets"]

                if only_context:
-                    return_value.append(
-                        {
-                            "search_result": [context] if context else None,
-                            "dataset_id": datasets[0].id,
-                            "dataset_name": datasets[0].name,
-                            "dataset_tenant_id": datasets[0].tenant_id,
-                            "graphs": graphs,
-                        }
-                    )
+                    search_result_dict = {
+                        "search_result": [context] if context else None,
+                        "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
+                    }
+                    if verbose:
+                        # Include graphs only in verbose mode
+                        search_result_dict["graphs"] = graphs
+
+                    return_value.append(search_result_dict)
                else:
-                    return_value.append(
-                        {
-                            "search_result": [result] if result else None,
-                            "dataset_id": datasets[0].id,
-                            "dataset_name": datasets[0].name,
-                            "dataset_tenant_id": datasets[0].tenant_id,
-                            "graphs": graphs,
-                        }
-                    )
+                    search_result_dict = {
+                        "search_result": [result] if result else None,
+                        "dataset_id": datasets[0].id,
+                        "dataset_name": datasets[0].name,
+                        "dataset_tenant_id": datasets[0].tenant_id,
+                    }
+                    if verbose:
+                        # Include graphs only in verbose mode
+                        search_result_dict["graphs"] = graphs
+
+                    return_value.append(search_result_dict)
+
            return return_value
        else:
            return_value = []
--- a/cognee/modules/visualization/cognee_network_visualization.py
+++ b/cognee/modules/visualization/cognee_network_visualization.py
@ -92,7 +92,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
        }
        links_list.append(link_data)

-    html_template = """
+    html_template = r"""
    <!DOCTYPE html>
    <html>
    <head>
--- a/cognee/tests/unit/modules/search/test_search.py
+++ b/cognee/tests/unit/modules/search/test_search.py
@ -129,14 +129,32 @@ async def test_search_access_control_returns_dataset_shaped_dicts(monkeypatch, s
    monkeypatch.setattr(search_mod, "backend_access_control_enabled", lambda: True)
    monkeypatch.setattr(search_mod, "authorized_search", dummy_authorized_search)

-    out = await search_mod.search(
+    out_non_verbose = await search_mod.search(
        query_text="q",
        query_type=SearchType.CHUNKS,
        dataset_ids=[ds.id],
        user=user,
+        verbose=False,
    )

-    assert out == [
+    assert out_non_verbose == [
+        {
+            "search_result": ["r"],
+            "dataset_id": ds.id,
+            "dataset_name": "ds1",
+            "dataset_tenant_id": "t1",
+        }
+    ]
+
+    out_verbose = await search_mod.search(
+        query_text="q",
+        query_type=SearchType.CHUNKS,
+        dataset_ids=[ds.id],
+        user=user,
+        verbose=True,
+    )
+
+    assert out_verbose == [
        {
            "search_result": ["r"],
            "dataset_id": ds.id,
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -20,19 +20,29 @@ echo "HTTP port: $HTTP_PORT"
 # smooth redeployments and container restarts while maintaining data integrity.
 echo "Running database migrations..."

+set +e # Disable exit on error to handle specific migration errors
 MIGRATION_OUTPUT=$(alembic upgrade head)
 MIGRATION_EXIT_CODE=$?
+set -e

 if [[ $MIGRATION_EXIT_CODE -ne 0 ]]; then
    if [[ "$MIGRATION_OUTPUT" == *"UserAlreadyExists"* ]] || [[ "$MIGRATION_OUTPUT" == *"User default_user@example.com already exists"* ]]; then
        echo "Warning: Default user already exists, continuing startup..."
    else
-        echo "Migration failed with unexpected error."
-        exit 1
-    fi
-fi
+        echo "Migration failed with unexpected error. Trying to run Cognee without migrations."

-echo "Database migrations done."
+        echo "Initializing database tables..."
+        python /app/cognee/modules/engine/operations/setup.py
+        INIT_EXIT_CODE=$?
+
+        if [[ $INIT_EXIT_CODE -ne 0 ]]; then
+            echo "Database initialization failed!"
+            exit 1
+        fi
+    fi
+else
+    echo "Database migrations done."
+fi

 echo "Starting server..."

--- a/examples/python/cognee_simple_document_demo.py
+++ b/examples/python/cognee_simple_document_demo.py
@ -1,8 +1,9 @@
 import asyncio
 import cognee
-
 import os

+from pprint import pprint
+
 # By default cognee uses OpenAI's gpt-5-mini LLM model
 # Provide your OpenAI LLM API KEY
 os.environ["LLM_API_KEY"] = ""
@ -24,13 +25,13 @@ async def cognee_demo():

    # Query Cognee for information from provided document
    answer = await cognee.search("List me all the important characters in Alice in Wonderland.")
-    print(answer)
+    pprint(answer)

    answer = await cognee.search("How did Alice end up in Wonderland?")
-    print(answer)
+    pprint(answer)

    answer = await cognee.search("Tell me about Alice's personality.")
-    print(answer)
+    pprint(answer)


 # Cognee is an async library, it has to be called in an async context
--- a/examples/python/dynamic_steps_example.py
+++ b/examples/python/dynamic_steps_example.py
@ -1,4 +1,5 @@
 import asyncio
+from pprint import pprint

 import cognee
 from cognee.api.v1.search import SearchType
@ -187,7 +188,7 @@ async def main(enable_steps):
        search_results = await cognee.search(
            query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
        )
-        print(search_results)
+        pprint(search_results)


 if __name__ == "__main__":
--- a/examples/python/multimedia_example.py
+++ b/examples/python/multimedia_example.py
@ -1,6 +1,8 @@
 import os
 import asyncio
 import pathlib
+from pprint import pprint
+
 from cognee.shared.logging_utils import setup_logging, ERROR

 import cognee
@ -42,7 +44,7 @@ async def main():

    # Display search results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)


 if __name__ == "__main__":
--- a/examples/python/ontology_demo_example.py
+++ b/examples/python/ontology_demo_example.py
@ -1,5 +1,6 @@
 import asyncio
 import os
+from pprint import pprint

 import cognee
 from cognee.api.v1.search import SearchType
@ -77,7 +78,7 @@ async def main():
        query_type=SearchType.GRAPH_COMPLETION,
        query_text="What are the exact cars and their types produced by Audi?",
    )
-    print(search_results)
+    pprint(search_results)

    await visualize_graph()

--- a/examples/python/permissions_example.py
+++ b/examples/python/permissions_example.py
@ -1,6 +1,7 @@
 import os
 import cognee
 import pathlib
+from pprint import pprint

 from cognee.modules.users.exceptions import PermissionDeniedError
 from cognee.modules.users.tenants.methods import select_tenant
@ -86,7 +87,7 @@ async def main():
    )
    print("\nSearch results as user_1 on dataset owned by user_1:")
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)

    # But user_1 cant read the dataset owned by user_2 (QUANTUM dataset)
    print("\nSearch result as user_1 on the dataset owned by user_2:")
@ -134,7 +135,7 @@ async def main():
        dataset_ids=[quantum_dataset_id],
    )
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)

    # If we'd like for user_1 to add new documents to the QUANTUM dataset owned by user_2, user_1 would have to get
    # "write" access permission, which user_1 currently does not have
@ -217,7 +218,7 @@ async def main():
        dataset_ids=[quantum_cognee_lab_dataset_id],
    )
    for result in search_results:
-        print(f"{result}\n")
+        pprint(result)

    # Note: All of these function calls and permission system is available through our backend endpoints as well

--- a/examples/python/run_custom_pipeline_example.py
+++ b/examples/python/run_custom_pipeline_example.py
@ -1,4 +1,6 @@
 import asyncio
+from pprint import pprint
+
 import cognee
 from cognee.modules.engine.operations.setup import setup
 from cognee.modules.users.methods import get_default_user
@ -71,7 +73,7 @@ async def main():
    print("Search results:")
    # Display results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)


 if __name__ == "__main__":
--- a/examples/python/simple_example.py
+++ b/examples/python/simple_example.py
@ -1,4 +1,6 @@
 import asyncio
+from pprint import pprint
+
 import cognee
 from cognee.shared.logging_utils import setup_logging, ERROR
 from cognee.api.v1.search import SearchType
@ -54,7 +56,7 @@ async def main():
    print("Search results:")
    # Display results
    for result_text in search_results:
-        print(result_text)
+        pprint(result_text)


 if __name__ == "__main__":
--- a/examples/python/temporal_example.py
+++ b/examples/python/temporal_example.py
@ -1,4 +1,5 @@
 import asyncio
+from pprint import pprint
 import cognee
 from cognee.shared.logging_utils import setup_logging, INFO
 from cognee.api.v1.search import SearchType
@ -87,7 +88,8 @@ async def main():
            top_k=15,
        )
        print(f"Query: {query_text}")
-        print(f"Results: {search_results}\n")
+        print("Results:")
+        pprint(search_results)


 if __name__ == "__main__":
--- a/examples/python/triplet_embeddings_example.py
+++ b/examples/python/triplet_embeddings_example.py
@ -1,4 +1,5 @@
 import asyncio
+from pprint import pprint

 import cognee
 from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
@ -65,7 +66,7 @@ async def main():
        query_type=SearchType.TRIPLET_COMPLETION,
        query_text="What are the models produced by Volkswagen based on the context?",
    )
-    print(search_results)
+    pprint(search_results)


 if __name__ == "__main__":
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "cognee"

-version = "0.5.1.dev0"
+version = "0.5.1"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = [
    { name = "Vasilije Markovic" },
--- a/uv.lock
+++ b/uv.lock