merge done

This commit is contained in:
vasilije 2025-05-19 13:22:52 +02:00
commit a5b28983bd
9 changed files with 18 additions and 225 deletions

View file

@ -97,7 +97,7 @@ git push origin feature/your-feature-name
2. Create a Pull Request:
- Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
- Click "Compare & Pull Request" and open a PR against dev branch
- Click "Compare & Pull Request"
- Fill in the PR template with details about your changes
## 5. 📜 Developer Certificate of Origin (DCO)

View file

@ -8,7 +8,7 @@ requires-python = ">=3.10"
dependencies = [
# For local cognee repo usage remove comment bellow and add absolute path to cognee
#"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
"cognee[postgres,codegraph,gemini,huggingface,docs]==0.1.40",
"cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
"fastmcp>=1.0",
"mcp==1.5.0",
"uv>=0.6.3",

View file

@ -24,46 +24,9 @@ log_file = get_log_file_location()
@mcp.tool()
async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
"""
Transform data into a structured knowledge graph in Cognee's memory layer.
This function launches a background task that processes the provided text/file location and
generates a knowledge graph representation. The function returns immediately while
the processing continues in the background due to MCP timeout constraints.
Parameters
----------
data : str
The data to be processed and transformed into structured knowledge.
This can include natural language, file location, or any text-based information
that should become part of the agent's memory.
graph_model_file : str, optional
Path to a custom schema file that defines the structure of the generated knowledge graph.
If provided, this file will be loaded using importlib to create a custom graph model.
Default is None, which uses Cognee's built-in KnowledgeGraph model.
graph_model_name : str, optional
Name of the class within the graph_model_file to instantiate as the graph model.
Required if graph_model_file is specified.
Default is None, which uses the default KnowledgeGraph class.
Returns
-------
list
A list containing a single TextContent object with information about the
background task launch and how to check its status.
Notes
-----
- The function launches a background task and returns immediately
- The actual cognify process may take significant time depending on text length
- Use the cognify_status tool to check the progress of the operation
"""
async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
async def cognify_task(
data: str, graph_model_file: str = None, graph_model_name: str = None
text: str, graph_model_file: str = None, graph_model_name: str = None
) -> str:
"""Build knowledge graph from the input text"""
# NOTE: MCP uses stdout to communicate, we must redirect all output
@ -75,7 +38,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
else:
graph_model = KnowledgeGraph
await cognee.add(data)
await cognee.add(text)
try:
await cognee.cognify(graph_model=graph_model)
@ -86,7 +49,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
asyncio.create_task(
cognify_task(
data=data,
text=text,
graph_model_file=graph_model_file,
graph_model_name=graph_model_name,
)
@ -108,35 +71,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
@mcp.tool()
async def codify(repo_path: str) -> list:
"""
Analyze and generate a code-specific knowledge graph from a software repository.
This function launches a background task that processes the provided repository
and builds a code knowledge graph. The function returns immediately while
the processing continues in the background due to MCP timeout constraints.
Parameters
----------
repo_path : str
Path to the code repository to analyze. This can be a local file path or a
relative path to a repository. The path should point to the root of the
repository or a specific directory within it.
Returns
-------
list
A list containing a single TextContent object with information about the
background task launch and how to check its status.
Notes
-----
- The function launches a background task and returns immediately
- The code graph generation may take significant time for larger repositories
- Use the codify_status tool to check the progress of the operation
- Process results are logged to the standard Cognee log file
- All stdout is redirected to stderr to maintain MCP communication integrity
"""
async def codify_task(repo_path: str):
# NOTE: MCP uses stdout to communicate, we must redirect all output
# going to stdout ( like the print function ) to stderr.
@ -169,46 +103,6 @@ async def codify(repo_path: str) -> list:
@mcp.tool()
async def search(search_query: str, search_type: str) -> list:
"""
Search the Cognee knowledge graph for information relevant to the query.
This function executes a search against the Cognee knowledge graph using the
specified query and search type. It returns formatted results based on the
search type selected.
Parameters
----------
search_query : str
The search query in natural language. This can be a question, instruction, or
any text that expresses what information is needed from the knowledge graph.
search_type : str
The type of search to perform. Valid options include:
- "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory
- "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
- "CODE": Returns code-related knowledge in JSON format
- "CHUNKS": Returns raw text chunks from the knowledge graph
- "INSIGHTS": Returns relationships between nodes in readable format
The search_type is case-insensitive and will be converted to uppercase.
Returns
-------
list
A list containing a single TextContent object with the search results.
The format of the result depends on the search_type:
- For CODE: JSON-formatted search results
- For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
- For CHUNKS: String representation of the raw chunks
- For INSIGHTS: Formatted string showing node relationships
- For other types: String representation of the search results
Notes
-----
- Different search types produce different output formats
- The function handles the conversion between Cognee's internal result format and MCP's output format
"""
async def search_task(search_query: str, search_type: str) -> str:
"""Search the knowledge graph"""
# NOTE: MCP uses stdout to communicate, we must redirect all output
@ -238,24 +132,7 @@ async def search(search_query: str, search_type: str) -> list:
@mcp.tool()
async def prune():
"""
Reset the Cognee knowledge graph by removing all stored information.
This function performs a complete reset of both the data layer and system layer
of the Cognee knowledge graph, removing all nodes, edges, and associated metadata.
It is typically used during development or when needing to start fresh with a new
knowledge base.
Returns
-------
list
A list containing a single TextContent object with confirmation of the prune operation.
Notes
-----
- This operation cannot be undone. All memory data will be permanently deleted.
- The function prunes both data content (using prune_data) and system metadata (using prune_system)
"""
"""Reset the knowledge graph"""
with redirect_stdout(sys.stderr):
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
@ -264,25 +141,7 @@ async def prune():
@mcp.tool()
async def cognify_status():
"""
Get the current status of the cognify pipeline.
This function retrieves information about current and recently completed cognify operations
in the main_dataset. It provides details on progress, success/failure status, and statistics
about the processed data.
Returns
-------
list
A list containing a single TextContent object with the status information as a string.
The status includes information about active and completed jobs for the cognify_pipeline.
Notes
-----
- The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset"
- Status information includes job progress, execution time, and completion status
- The status is returned in string format for easy reading
"""
"""Get status of cognify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status(
@ -293,25 +152,7 @@ async def cognify_status():
@mcp.tool()
async def codify_status():
"""
Get the current status of the codify pipeline.
This function retrieves information about current and recently completed codify operations
in the codebase dataset. It provides details on progress, success/failure status, and statistics
about the processed code repositories.
Returns
-------
list
A list containing a single TextContent object with the status information as a string.
The status includes information about active and completed jobs for the cognify_code_pipeline.
Notes
-----
- The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset
- Status information includes job progress, execution time, and completion status
- The status is returned in string format for easy reading
"""
"""Get status of codify pipeline"""
with redirect_stdout(sys.stderr):
user = await get_default_user()
status = await get_pipeline_status(

View file

@ -178,18 +178,10 @@ class MilvusAdapter(VectorDBInterface):
):
from pymilvus import MilvusException, exceptions
if limit <= 0:
return []
client = self.get_milvus_client()
if query_text is None and query_vector is None:
raise ValueError("One of query_text or query_vector must be provided!")
if not client.has_collection(collection_name=collection_name):
logger.warning(
f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []."
)
return []
try:
query_vector = query_vector or (await self.embed_data([query_text]))[0]
@ -216,19 +208,12 @@ class MilvusAdapter(VectorDBInterface):
)
for result in results[0]
]
except exceptions.CollectionNotExistException:
logger.warning(
f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []."
)
return []
except exceptions.CollectionNotExistException as error:
raise CollectionNotFoundError(
f"Collection '{collection_name}' does not exist!"
) from error
except MilvusException as e:
# Catch other Milvus errors that are "collection not found" (paranoid safety)
if "collection not found" in str(e).lower() or "schema" in str(e).lower():
logger.warning(
f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []."
)
return []
logger.error(f"Error searching Milvus collection '{collection_name}': {e}")
logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
raise e
async def batch_search(

View file

@ -159,24 +159,12 @@ class QDrantAdapter(VectorDBInterface):
query_vector: Optional[List[float]] = None,
limit: int = 15,
with_vector: bool = False,
) -> List[ScoredResult]:
):
from qdrant_client.http.exceptions import UnexpectedResponse
if query_text is None and query_vector is None:
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
if limit <= 0:
return []
if not await self.has_collection(collection_name):
logger.warning(
f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []."
)
return []
if query_vector is None:
query_vector = (await self.embed_data([query_text]))[0]
try:
client = self.get_qdrant_client()

View file

@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface):
# )
else:
data_point: DataObject = data_points[0]
if await collection.data.exists(data_point.uuid):
if collection.data.exists(data_point.uuid):
return await collection.data.update(
uuid=data_point.uuid,
vector=data_point.vector,

View file

@ -146,7 +146,7 @@ async def brute_force_search(
async def search_in_collection(collection_name: str):
try:
return await vector_engine.search(
collection_name=collection_name, query_text=query, limit=50
collection_name=collection_name, query_text=query, limit=0
)
except CollectionNotFoundError:
return []

View file

@ -136,27 +136,6 @@ Repository = "https://github.com/topoteretes/cognee"
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build]
exclude = [
"/bin",
"/dist",
"/.data",
"/.github",
"/alembic",
"/distributed",
"/deployment",
"/cognee-mcp",
"/cognee-frontend",
"/examples",
"/helm",
"/licenses",
"/logs",
"/notebooks",
"/profiling",
"/tests",
"/tools",
]
[tool.ruff]
line-length = 100
exclude = [

2
uv.lock generated
View file

@ -860,7 +860,7 @@ wheels = [
[[package]]
name = "cognee"
version = "0.1.40"
version = "0.1.39"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },