merge done

2025-05-19 13:22:52 +02:00 · 2025-05-19 13:22:52 +02:00 · a5b28983bd
commit a5b28983bd
parent 5cf14eb7fc b1b4ae3d5f
9 changed files with 18 additions and 225 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -97,7 +97,7 @@ git push origin feature/your-feature-name

 2. Create a Pull Request:
   - Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
-   - Click "Compare & Pull Request" and open a PR against dev branch
+   - Click "Compare & Pull Request"
   - Fill in the PR template with details about your changes

 ## 5. 📜 Developer Certificate of Origin (DCO)
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -8,7 +8,7 @@ requires-python = ">=3.10"
 dependencies = [
    # For local cognee repo usage remove comment bellow and add absolute path to cognee
    #"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
-    "cognee[postgres,codegraph,gemini,huggingface,docs]==0.1.40",
+    "cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
    "fastmcp>=1.0",
    "mcp==1.5.0",
    "uv>=0.6.3",
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -24,46 +24,9 @@ log_file = get_log_file_location()


@mcp.tool()
-async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
-    """
-    Transform data into a structured knowledge graph in Cognee's memory layer.
-
-    This function launches a background task that processes the provided text/file location and
-    generates a knowledge graph representation. The function returns immediately while
-    the processing continues in the background due to MCP timeout constraints.
-
-    Parameters
-    ----------
-    data : str
-        The data to be processed and transformed into structured knowledge.
-        This can include natural language, file location, or any text-based information
-        that should become part of the agent's memory.
-
-    graph_model_file : str, optional
-        Path to a custom schema file that defines the structure of the generated knowledge graph.
-        If provided, this file will be loaded using importlib to create a custom graph model.
-        Default is None, which uses Cognee's built-in KnowledgeGraph model.
-
-    graph_model_name : str, optional
-        Name of the class within the graph_model_file to instantiate as the graph model.
-        Required if graph_model_file is specified.
-        Default is None, which uses the default KnowledgeGraph class.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with information about the
-        background task launch and how to check its status.
-
-    Notes
-    -----
-    - The function launches a background task and returns immediately
-    - The actual cognify process may take significant time depending on text length
-    - Use the cognify_status tool to check the progress of the operation
-    """
-
+async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
    async def cognify_task(
-        data: str, graph_model_file: str = None, graph_model_name: str = None
+        text: str, graph_model_file: str = None, graph_model_name: str = None
    ) -> str:
        """Build knowledge graph from the input text"""
        # NOTE: MCP uses stdout to communicate, we must redirect all output
@ -75,7 +38,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
            else:
                graph_model = KnowledgeGraph

-            await cognee.add(data)
+            await cognee.add(text)

            try:
                await cognee.cognify(graph_model=graph_model)
@ -86,7 +49,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str

    asyncio.create_task(
        cognify_task(
-            data=data,
+            text=text,
            graph_model_file=graph_model_file,
            graph_model_name=graph_model_name,
        )
@ -108,35 +71,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str

@mcp.tool()
 async def codify(repo_path: str) -> list:
-    """
-    Analyze and generate a code-specific knowledge graph from a software repository.
-
-    This function launches a background task that processes the provided repository
-    and builds a code knowledge graph. The function returns immediately while
-    the processing continues in the background due to MCP timeout constraints.
-
-    Parameters
-    ----------
-    repo_path : str
-        Path to the code repository to analyze. This can be a local file path or a
-        relative path to a repository. The path should point to the root of the
-        repository or a specific directory within it.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with information about the
-        background task launch and how to check its status.
-
-    Notes
-    -----
-    - The function launches a background task and returns immediately
-    - The code graph generation may take significant time for larger repositories
-    - Use the codify_status tool to check the progress of the operation
-    - Process results are logged to the standard Cognee log file
-    - All stdout is redirected to stderr to maintain MCP communication integrity
-    """
-
    async def codify_task(repo_path: str):
        # NOTE: MCP uses stdout to communicate, we must redirect all output
        #       going to stdout ( like the print function ) to stderr.
@ -169,46 +103,6 @@ async def codify(repo_path: str) -> list:

@mcp.tool()
 async def search(search_query: str, search_type: str) -> list:
-    """
-    Search the Cognee knowledge graph for information relevant to the query.
-
-    This function executes a search against the Cognee knowledge graph using the
-    specified query and search type. It returns formatted results based on the
-    search type selected.
-
-    Parameters
-    ----------
-    search_query : str
-        The search query in natural language. This can be a question, instruction, or
-        any text that expresses what information is needed from the knowledge graph.
-
-    search_type : str
-        The type of search to perform. Valid options include:
-        - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory
-        - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
-        - "CODE": Returns code-related knowledge in JSON format
-        - "CHUNKS": Returns raw text chunks from the knowledge graph
-        - "INSIGHTS": Returns relationships between nodes in readable format
-
-        The search_type is case-insensitive and will be converted to uppercase.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with the search results.
-        The format of the result depends on the search_type:
-        - For CODE: JSON-formatted search results
-        - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
-        - For CHUNKS: String representation of the raw chunks
-        - For INSIGHTS: Formatted string showing node relationships
-        - For other types: String representation of the search results
-
-    Notes
-    -----
-    - Different search types produce different output formats
-    - The function handles the conversion between Cognee's internal result format and MCP's output format
-    """
-
    async def search_task(search_query: str, search_type: str) -> str:
        """Search the knowledge graph"""
        # NOTE: MCP uses stdout to communicate, we must redirect all output
@ -238,24 +132,7 @@ async def search(search_query: str, search_type: str) -> list:

@mcp.tool()
 async def prune():
-    """
-    Reset the Cognee knowledge graph by removing all stored information.
-
-    This function performs a complete reset of both the data layer and system layer
-    of the Cognee knowledge graph, removing all nodes, edges, and associated metadata.
-    It is typically used during development or when needing to start fresh with a new
-    knowledge base.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with confirmation of the prune operation.
-
-    Notes
-    -----
-    - This operation cannot be undone. All memory data will be permanently deleted.
-    - The function prunes both data content (using prune_data) and system metadata (using prune_system)
-    """
+    """Reset the knowledge graph"""
    with redirect_stdout(sys.stderr):
        await cognee.prune.prune_data()
        await cognee.prune.prune_system(metadata=True)
@ -264,25 +141,7 @@ async def prune():

@mcp.tool()
 async def cognify_status():
-    """
-    Get the current status of the cognify pipeline.
-
-    This function retrieves information about current and recently completed cognify operations
-    in the main_dataset. It provides details on progress, success/failure status, and statistics
-    about the processed data.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with the status information as a string.
-        The status includes information about active and completed jobs for the cognify_pipeline.
-
-    Notes
-    -----
-    - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset"
-    - Status information includes job progress, execution time, and completion status
-    - The status is returned in string format for easy reading
-    """
+    """Get status of cognify pipeline"""
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
        status = await get_pipeline_status(
@ -293,25 +152,7 @@ async def cognify_status():

@mcp.tool()
 async def codify_status():
-    """
-    Get the current status of the codify pipeline.
-
-    This function retrieves information about current and recently completed codify operations
-    in the codebase dataset. It provides details on progress, success/failure status, and statistics
-    about the processed code repositories.
-
-    Returns
-    -------
-    list
-        A list containing a single TextContent object with the status information as a string.
-        The status includes information about active and completed jobs for the cognify_code_pipeline.
-
-    Notes
-    -----
-    - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset
-    - Status information includes job progress, execution time, and completion status
-    - The status is returned in string format for easy reading
-    """
+    """Get status of codify pipeline"""
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
        status = await get_pipeline_status(
--- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
+++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py
@ -178,18 +178,10 @@ class MilvusAdapter(VectorDBInterface):
    ):
        from pymilvus import MilvusException, exceptions

-        if limit <= 0:
-            return []
        client = self.get_milvus_client()
        if query_text is None and query_vector is None:
            raise ValueError("One of query_text or query_vector must be provided!")

-        if not client.has_collection(collection_name=collection_name):
-            logger.warning(
-                f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []."
-            )
-            return []
-
        try:
            query_vector = query_vector or (await self.embed_data([query_text]))[0]

@ -216,19 +208,12 @@ class MilvusAdapter(VectorDBInterface):
                )
                for result in results[0]
            ]
-        except exceptions.CollectionNotExistException:
-            logger.warning(
-                f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []."
-            )
-            return []
+        except exceptions.CollectionNotExistException as error:
+            raise CollectionNotFoundError(
+                f"Collection '{collection_name}' does not exist!"
+            ) from error
        except MilvusException as e:
-            # Catch other Milvus errors that are "collection not found" (paranoid safety)
-            if "collection not found" in str(e).lower() or "schema" in str(e).lower():
-                logger.warning(
-                    f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []."
-                )
-                return []
-            logger.error(f"Error searching Milvus collection '{collection_name}': {e}")
+            logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
            raise e

    async def batch_search(
--- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
+++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py
@ -159,24 +159,12 @@ class QDrantAdapter(VectorDBInterface):
        query_vector: Optional[List[float]] = None,
        limit: int = 15,
        with_vector: bool = False,
-    ) -> List[ScoredResult]:
+    ):
        from qdrant_client.http.exceptions import UnexpectedResponse

        if query_text is None and query_vector is None:
            raise InvalidValueError(message="One of query_text or query_vector must be provided!")

-        if limit <= 0:
-            return []
-
-        if not await self.has_collection(collection_name):
-            logger.warning(
-                f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []."
-            )
-            return []
-
-        if query_vector is None:
-            query_vector = (await self.embed_data([query_text]))[0]
-
        try:
            client = self.get_qdrant_client()

--- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
+++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py
@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface):
                #         )
            else:
                data_point: DataObject = data_points[0]
-                if await collection.data.exists(data_point.uuid):
+                if collection.data.exists(data_point.uuid):
                    return await collection.data.update(
                        uuid=data_point.uuid,
                        vector=data_point.vector,
--- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py
+++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py
@ -146,7 +146,7 @@ async def brute_force_search(
    async def search_in_collection(collection_name: str):
        try:
            return await vector_engine.search(
-                collection_name=collection_name, query_text=query, limit=50
+                collection_name=collection_name, query_text=query, limit=0
            )
        except CollectionNotFoundError:
            return []
--- a/pyproject.toml
+++ b/pyproject.toml
@ -136,27 +136,6 @@ Repository = "https://github.com/topoteretes/cognee"
 requires = ["hatchling"]
 build-backend = "hatchling.build"

-[tool.hatch.build]
-exclude = [
-  "/bin",
-  "/dist",
-  "/.data",
-  "/.github",
-  "/alembic",
-  "/distributed",
-  "/deployment",
-  "/cognee-mcp",
-  "/cognee-frontend",
-  "/examples",
-  "/helm",
-  "/licenses",
-  "/logs",
-  "/notebooks",
-  "/profiling",
-  "/tests",
-  "/tools",
-]
-
 [tool.ruff]
 line-length = 100
 exclude = [
--- a/uv.lock
+++ b/uv.lock
@ -860,7 +860,7 @@ wheels = [

 [[package]]
 name = "cognee"
-version = "0.1.40"
+version = "0.1.39"
 source = { editable = "." }
 dependencies = [
    { name = "aiofiles" },