diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ed52658c1..9e97d0d23 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -97,7 +97,7 @@ git push origin feature/your-feature-name 2. Create a Pull Request: - Go to the [**cognee** repository](https://github.com/topoteretes/cognee) - - Click "Compare & Pull Request" and open a PR against dev branch + - Click "Compare & Pull Request" - Fill in the PR template with details about your changes ## 5. 📜 Developer Certificate of Origin (DCO) diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml index e1bc56153..a2190dfda 100644 --- a/cognee-mcp/pyproject.toml +++ b/cognee-mcp/pyproject.toml @@ -8,7 +8,7 @@ requires-python = ">=3.10" dependencies = [ # For local cognee repo usage remove comment bellow and add absolute path to cognee #"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users//Desktop/cognee", - "cognee[postgres,codegraph,gemini,huggingface,docs]==0.1.40", + "cognee[postgres,codegraph,gemini,huggingface]==0.1.40", "fastmcp>=1.0", "mcp==1.5.0", "uv>=0.6.3", diff --git a/cognee-mcp/src/server.py b/cognee-mcp/src/server.py index 6a0ff41d0..f8c65bc7c 100755 --- a/cognee-mcp/src/server.py +++ b/cognee-mcp/src/server.py @@ -24,46 +24,9 @@ log_file = get_log_file_location() @mcp.tool() -async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list: - """ - Transform data into a structured knowledge graph in Cognee's memory layer. - - This function launches a background task that processes the provided text/file location and - generates a knowledge graph representation. The function returns immediately while - the processing continues in the background due to MCP timeout constraints. - - Parameters - ---------- - data : str - The data to be processed and transformed into structured knowledge. - This can include natural language, file location, or any text-based information - that should become part of the agent's memory. - - graph_model_file : str, optional - Path to a custom schema file that defines the structure of the generated knowledge graph. - If provided, this file will be loaded using importlib to create a custom graph model. - Default is None, which uses Cognee's built-in KnowledgeGraph model. - - graph_model_name : str, optional - Name of the class within the graph_model_file to instantiate as the graph model. - Required if graph_model_file is specified. - Default is None, which uses the default KnowledgeGraph class. - - Returns - ------- - list - A list containing a single TextContent object with information about the - background task launch and how to check its status. - - Notes - ----- - - The function launches a background task and returns immediately - - The actual cognify process may take significant time depending on text length - - Use the cognify_status tool to check the progress of the operation - """ - +async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list: async def cognify_task( - data: str, graph_model_file: str = None, graph_model_name: str = None + text: str, graph_model_file: str = None, graph_model_name: str = None ) -> str: """Build knowledge graph from the input text""" # NOTE: MCP uses stdout to communicate, we must redirect all output @@ -75,7 +38,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str else: graph_model = KnowledgeGraph - await cognee.add(data) + await cognee.add(text) try: await cognee.cognify(graph_model=graph_model) @@ -86,7 +49,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str asyncio.create_task( cognify_task( - data=data, + text=text, graph_model_file=graph_model_file, graph_model_name=graph_model_name, ) @@ -108,35 +71,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str @mcp.tool() async def codify(repo_path: str) -> list: - """ - Analyze and generate a code-specific knowledge graph from a software repository. - - This function launches a background task that processes the provided repository - and builds a code knowledge graph. The function returns immediately while - the processing continues in the background due to MCP timeout constraints. - - Parameters - ---------- - repo_path : str - Path to the code repository to analyze. This can be a local file path or a - relative path to a repository. The path should point to the root of the - repository or a specific directory within it. - - Returns - ------- - list - A list containing a single TextContent object with information about the - background task launch and how to check its status. - - Notes - ----- - - The function launches a background task and returns immediately - - The code graph generation may take significant time for larger repositories - - Use the codify_status tool to check the progress of the operation - - Process results are logged to the standard Cognee log file - - All stdout is redirected to stderr to maintain MCP communication integrity - """ - async def codify_task(repo_path: str): # NOTE: MCP uses stdout to communicate, we must redirect all output # going to stdout ( like the print function ) to stderr. @@ -169,46 +103,6 @@ async def codify(repo_path: str) -> list: @mcp.tool() async def search(search_query: str, search_type: str) -> list: - """ - Search the Cognee knowledge graph for information relevant to the query. - - This function executes a search against the Cognee knowledge graph using the - specified query and search type. It returns formatted results based on the - search type selected. - - Parameters - ---------- - search_query : str - The search query in natural language. This can be a question, instruction, or - any text that expresses what information is needed from the knowledge graph. - - search_type : str - The type of search to perform. Valid options include: - - "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory - - "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data - - "CODE": Returns code-related knowledge in JSON format - - "CHUNKS": Returns raw text chunks from the knowledge graph - - "INSIGHTS": Returns relationships between nodes in readable format - - The search_type is case-insensitive and will be converted to uppercase. - - Returns - ------- - list - A list containing a single TextContent object with the search results. - The format of the result depends on the search_type: - - For CODE: JSON-formatted search results - - For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion - - For CHUNKS: String representation of the raw chunks - - For INSIGHTS: Formatted string showing node relationships - - For other types: String representation of the search results - - Notes - ----- - - Different search types produce different output formats - - The function handles the conversion between Cognee's internal result format and MCP's output format - """ - async def search_task(search_query: str, search_type: str) -> str: """Search the knowledge graph""" # NOTE: MCP uses stdout to communicate, we must redirect all output @@ -238,24 +132,7 @@ async def search(search_query: str, search_type: str) -> list: @mcp.tool() async def prune(): - """ - Reset the Cognee knowledge graph by removing all stored information. - - This function performs a complete reset of both the data layer and system layer - of the Cognee knowledge graph, removing all nodes, edges, and associated metadata. - It is typically used during development or when needing to start fresh with a new - knowledge base. - - Returns - ------- - list - A list containing a single TextContent object with confirmation of the prune operation. - - Notes - ----- - - This operation cannot be undone. All memory data will be permanently deleted. - - The function prunes both data content (using prune_data) and system metadata (using prune_system) - """ + """Reset the knowledge graph""" with redirect_stdout(sys.stderr): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) @@ -264,25 +141,7 @@ async def prune(): @mcp.tool() async def cognify_status(): - """ - Get the current status of the cognify pipeline. - - This function retrieves information about current and recently completed cognify operations - in the main_dataset. It provides details on progress, success/failure status, and statistics - about the processed data. - - Returns - ------- - list - A list containing a single TextContent object with the status information as a string. - The status includes information about active and completed jobs for the cognify_pipeline. - - Notes - ----- - - The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset" - - Status information includes job progress, execution time, and completion status - - The status is returned in string format for easy reading - """ + """Get status of cognify pipeline""" with redirect_stdout(sys.stderr): user = await get_default_user() status = await get_pipeline_status( @@ -293,25 +152,7 @@ async def cognify_status(): @mcp.tool() async def codify_status(): - """ - Get the current status of the codify pipeline. - - This function retrieves information about current and recently completed codify operations - in the codebase dataset. It provides details on progress, success/failure status, and statistics - about the processed code repositories. - - Returns - ------- - list - A list containing a single TextContent object with the status information as a string. - The status includes information about active and completed jobs for the cognify_code_pipeline. - - Notes - ----- - - The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset - - Status information includes job progress, execution time, and completion status - - The status is returned in string format for easy reading - """ + """Get status of codify pipeline""" with redirect_stdout(sys.stderr): user = await get_default_user() status = await get_pipeline_status( diff --git a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py index 16da8dbb0..9daebb760 100644 --- a/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py +++ b/cognee/infrastructure/databases/vector/milvus/MilvusAdapter.py @@ -178,18 +178,10 @@ class MilvusAdapter(VectorDBInterface): ): from pymilvus import MilvusException, exceptions - if limit <= 0: - return [] client = self.get_milvus_client() if query_text is None and query_vector is None: raise ValueError("One of query_text or query_vector must be provided!") - if not client.has_collection(collection_name=collection_name): - logger.warning( - f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []." - ) - return [] - try: query_vector = query_vector or (await self.embed_data([query_text]))[0] @@ -216,19 +208,12 @@ class MilvusAdapter(VectorDBInterface): ) for result in results[0] ] - except exceptions.CollectionNotExistException: - logger.warning( - f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []." - ) - return [] + except exceptions.CollectionNotExistException as error: + raise CollectionNotFoundError( + f"Collection '{collection_name}' does not exist!" + ) from error except MilvusException as e: - # Catch other Milvus errors that are "collection not found" (paranoid safety) - if "collection not found" in str(e).lower() or "schema" in str(e).lower(): - logger.warning( - f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []." - ) - return [] - logger.error(f"Error searching Milvus collection '{collection_name}': {e}") + logger.error(f"Error during search in collection '{collection_name}': {str(e)}") raise e async def batch_search( diff --git a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py index 951f73e6e..72d66e2de 100644 --- a/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +++ b/cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py @@ -159,24 +159,12 @@ class QDrantAdapter(VectorDBInterface): query_vector: Optional[List[float]] = None, limit: int = 15, with_vector: bool = False, - ) -> List[ScoredResult]: + ): from qdrant_client.http.exceptions import UnexpectedResponse if query_text is None and query_vector is None: raise InvalidValueError(message="One of query_text or query_vector must be provided!") - if limit <= 0: - return [] - - if not await self.has_collection(collection_name): - logger.warning( - f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []." - ) - return [] - - if query_vector is None: - query_vector = (await self.embed_data([query_text]))[0] - try: client = self.get_qdrant_client() diff --git a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py index f0b3497cd..dac6bc83f 100644 --- a/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +++ b/cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py @@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface): # ) else: data_point: DataObject = data_points[0] - if await collection.data.exists(data_point.uuid): + if collection.data.exists(data_point.uuid): return await collection.data.update( uuid=data_point.uuid, vector=data_point.vector, diff --git a/cognee/modules/retrieval/utils/brute_force_triplet_search.py b/cognee/modules/retrieval/utils/brute_force_triplet_search.py index 2e6775e64..0a08fbd00 100644 --- a/cognee/modules/retrieval/utils/brute_force_triplet_search.py +++ b/cognee/modules/retrieval/utils/brute_force_triplet_search.py @@ -146,7 +146,7 @@ async def brute_force_search( async def search_in_collection(collection_name: str): try: return await vector_engine.search( - collection_name=collection_name, query_text=query, limit=50 + collection_name=collection_name, query_text=query, limit=0 ) except CollectionNotFoundError: return [] diff --git a/pyproject.toml b/pyproject.toml index 170788a1c..675ee101a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,27 +136,6 @@ Repository = "https://github.com/topoteretes/cognee" requires = ["hatchling"] build-backend = "hatchling.build" -[tool.hatch.build] -exclude = [ - "/bin", - "/dist", - "/.data", - "/.github", - "/alembic", - "/distributed", - "/deployment", - "/cognee-mcp", - "/cognee-frontend", - "/examples", - "/helm", - "/licenses", - "/logs", - "/notebooks", - "/profiling", - "/tests", - "/tools", -] - [tool.ruff] line-length = 100 exclude = [ diff --git a/uv.lock b/uv.lock index e7c0d79f8..0e6236625 100644 --- a/uv.lock +++ b/uv.lock @@ -860,7 +860,7 @@ wheels = [ [[package]] name = "cognee" -version = "0.1.40" +version = "0.1.39" source = { editable = "." } dependencies = [ { name = "aiofiles" },