merge done
This commit is contained in:
commit
a5b28983bd
9 changed files with 18 additions and 225 deletions
|
|
@ -97,7 +97,7 @@ git push origin feature/your-feature-name
|
||||||
|
|
||||||
2. Create a Pull Request:
|
2. Create a Pull Request:
|
||||||
- Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
|
- Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
|
||||||
- Click "Compare & Pull Request" and open a PR against dev branch
|
- Click "Compare & Pull Request"
|
||||||
- Fill in the PR template with details about your changes
|
- Fill in the PR template with details about your changes
|
||||||
|
|
||||||
## 5. 📜 Developer Certificate of Origin (DCO)
|
## 5. 📜 Developer Certificate of Origin (DCO)
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
# For local cognee repo usage remove comment bellow and add absolute path to cognee
|
# For local cognee repo usage remove comment bellow and add absolute path to cognee
|
||||||
#"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
|
#"cognee[postgres,codegraph,gemini,huggingface] @ file:/Users/<username>/Desktop/cognee",
|
||||||
"cognee[postgres,codegraph,gemini,huggingface,docs]==0.1.40",
|
"cognee[postgres,codegraph,gemini,huggingface]==0.1.40",
|
||||||
"fastmcp>=1.0",
|
"fastmcp>=1.0",
|
||||||
"mcp==1.5.0",
|
"mcp==1.5.0",
|
||||||
"uv>=0.6.3",
|
"uv>=0.6.3",
|
||||||
|
|
|
||||||
|
|
@ -24,46 +24,9 @@ log_file = get_log_file_location()
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def cognify(data: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
|
async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> list:
|
||||||
"""
|
|
||||||
Transform data into a structured knowledge graph in Cognee's memory layer.
|
|
||||||
|
|
||||||
This function launches a background task that processes the provided text/file location and
|
|
||||||
generates a knowledge graph representation. The function returns immediately while
|
|
||||||
the processing continues in the background due to MCP timeout constraints.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
data : str
|
|
||||||
The data to be processed and transformed into structured knowledge.
|
|
||||||
This can include natural language, file location, or any text-based information
|
|
||||||
that should become part of the agent's memory.
|
|
||||||
|
|
||||||
graph_model_file : str, optional
|
|
||||||
Path to a custom schema file that defines the structure of the generated knowledge graph.
|
|
||||||
If provided, this file will be loaded using importlib to create a custom graph model.
|
|
||||||
Default is None, which uses Cognee's built-in KnowledgeGraph model.
|
|
||||||
|
|
||||||
graph_model_name : str, optional
|
|
||||||
Name of the class within the graph_model_file to instantiate as the graph model.
|
|
||||||
Required if graph_model_file is specified.
|
|
||||||
Default is None, which uses the default KnowledgeGraph class.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with information about the
|
|
||||||
background task launch and how to check its status.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- The function launches a background task and returns immediately
|
|
||||||
- The actual cognify process may take significant time depending on text length
|
|
||||||
- Use the cognify_status tool to check the progress of the operation
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def cognify_task(
|
async def cognify_task(
|
||||||
data: str, graph_model_file: str = None, graph_model_name: str = None
|
text: str, graph_model_file: str = None, graph_model_name: str = None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Build knowledge graph from the input text"""
|
"""Build knowledge graph from the input text"""
|
||||||
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
||||||
|
|
@ -75,7 +38,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
|
||||||
else:
|
else:
|
||||||
graph_model = KnowledgeGraph
|
graph_model = KnowledgeGraph
|
||||||
|
|
||||||
await cognee.add(data)
|
await cognee.add(text)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await cognee.cognify(graph_model=graph_model)
|
await cognee.cognify(graph_model=graph_model)
|
||||||
|
|
@ -86,7 +49,7 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
|
||||||
|
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
cognify_task(
|
cognify_task(
|
||||||
data=data,
|
text=text,
|
||||||
graph_model_file=graph_model_file,
|
graph_model_file=graph_model_file,
|
||||||
graph_model_name=graph_model_name,
|
graph_model_name=graph_model_name,
|
||||||
)
|
)
|
||||||
|
|
@ -108,35 +71,6 @@ async def cognify(data: str, graph_model_file: str = None, graph_model_name: str
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def codify(repo_path: str) -> list:
|
async def codify(repo_path: str) -> list:
|
||||||
"""
|
|
||||||
Analyze and generate a code-specific knowledge graph from a software repository.
|
|
||||||
|
|
||||||
This function launches a background task that processes the provided repository
|
|
||||||
and builds a code knowledge graph. The function returns immediately while
|
|
||||||
the processing continues in the background due to MCP timeout constraints.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
repo_path : str
|
|
||||||
Path to the code repository to analyze. This can be a local file path or a
|
|
||||||
relative path to a repository. The path should point to the root of the
|
|
||||||
repository or a specific directory within it.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with information about the
|
|
||||||
background task launch and how to check its status.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- The function launches a background task and returns immediately
|
|
||||||
- The code graph generation may take significant time for larger repositories
|
|
||||||
- Use the codify_status tool to check the progress of the operation
|
|
||||||
- Process results are logged to the standard Cognee log file
|
|
||||||
- All stdout is redirected to stderr to maintain MCP communication integrity
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def codify_task(repo_path: str):
|
async def codify_task(repo_path: str):
|
||||||
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
||||||
# going to stdout ( like the print function ) to stderr.
|
# going to stdout ( like the print function ) to stderr.
|
||||||
|
|
@ -169,46 +103,6 @@ async def codify(repo_path: str) -> list:
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def search(search_query: str, search_type: str) -> list:
|
async def search(search_query: str, search_type: str) -> list:
|
||||||
"""
|
|
||||||
Search the Cognee knowledge graph for information relevant to the query.
|
|
||||||
|
|
||||||
This function executes a search against the Cognee knowledge graph using the
|
|
||||||
specified query and search type. It returns formatted results based on the
|
|
||||||
search type selected.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
search_query : str
|
|
||||||
The search query in natural language. This can be a question, instruction, or
|
|
||||||
any text that expresses what information is needed from the knowledge graph.
|
|
||||||
|
|
||||||
search_type : str
|
|
||||||
The type of search to perform. Valid options include:
|
|
||||||
- "GRAPH_COMPLETION": Returns an LLM response based on the search query and Cognee's memory
|
|
||||||
- "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
|
|
||||||
- "CODE": Returns code-related knowledge in JSON format
|
|
||||||
- "CHUNKS": Returns raw text chunks from the knowledge graph
|
|
||||||
- "INSIGHTS": Returns relationships between nodes in readable format
|
|
||||||
|
|
||||||
The search_type is case-insensitive and will be converted to uppercase.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with the search results.
|
|
||||||
The format of the result depends on the search_type:
|
|
||||||
- For CODE: JSON-formatted search results
|
|
||||||
- For GRAPH_COMPLETION/RAG_COMPLETION: A single text completion
|
|
||||||
- For CHUNKS: String representation of the raw chunks
|
|
||||||
- For INSIGHTS: Formatted string showing node relationships
|
|
||||||
- For other types: String representation of the search results
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- Different search types produce different output formats
|
|
||||||
- The function handles the conversion between Cognee's internal result format and MCP's output format
|
|
||||||
"""
|
|
||||||
|
|
||||||
async def search_task(search_query: str, search_type: str) -> str:
|
async def search_task(search_query: str, search_type: str) -> str:
|
||||||
"""Search the knowledge graph"""
|
"""Search the knowledge graph"""
|
||||||
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
# NOTE: MCP uses stdout to communicate, we must redirect all output
|
||||||
|
|
@ -238,24 +132,7 @@ async def search(search_query: str, search_type: str) -> list:
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def prune():
|
async def prune():
|
||||||
"""
|
"""Reset the knowledge graph"""
|
||||||
Reset the Cognee knowledge graph by removing all stored information.
|
|
||||||
|
|
||||||
This function performs a complete reset of both the data layer and system layer
|
|
||||||
of the Cognee knowledge graph, removing all nodes, edges, and associated metadata.
|
|
||||||
It is typically used during development or when needing to start fresh with a new
|
|
||||||
knowledge base.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with confirmation of the prune operation.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- This operation cannot be undone. All memory data will be permanently deleted.
|
|
||||||
- The function prunes both data content (using prune_data) and system metadata (using prune_system)
|
|
||||||
"""
|
|
||||||
with redirect_stdout(sys.stderr):
|
with redirect_stdout(sys.stderr):
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
@ -264,25 +141,7 @@ async def prune():
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def cognify_status():
|
async def cognify_status():
|
||||||
"""
|
"""Get status of cognify pipeline"""
|
||||||
Get the current status of the cognify pipeline.
|
|
||||||
|
|
||||||
This function retrieves information about current and recently completed cognify operations
|
|
||||||
in the main_dataset. It provides details on progress, success/failure status, and statistics
|
|
||||||
about the processed data.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with the status information as a string.
|
|
||||||
The status includes information about active and completed jobs for the cognify_pipeline.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- The function retrieves pipeline status specifically for the "cognify_pipeline" on the "main_dataset"
|
|
||||||
- Status information includes job progress, execution time, and completion status
|
|
||||||
- The status is returned in string format for easy reading
|
|
||||||
"""
|
|
||||||
with redirect_stdout(sys.stderr):
|
with redirect_stdout(sys.stderr):
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
status = await get_pipeline_status(
|
status = await get_pipeline_status(
|
||||||
|
|
@ -293,25 +152,7 @@ async def cognify_status():
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
async def codify_status():
|
async def codify_status():
|
||||||
"""
|
"""Get status of codify pipeline"""
|
||||||
Get the current status of the codify pipeline.
|
|
||||||
|
|
||||||
This function retrieves information about current and recently completed codify operations
|
|
||||||
in the codebase dataset. It provides details on progress, success/failure status, and statistics
|
|
||||||
about the processed code repositories.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
list
|
|
||||||
A list containing a single TextContent object with the status information as a string.
|
|
||||||
The status includes information about active and completed jobs for the cognify_code_pipeline.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
- The function retrieves pipeline status specifically for the "cognify_code_pipeline" on the "codebase" dataset
|
|
||||||
- Status information includes job progress, execution time, and completion status
|
|
||||||
- The status is returned in string format for easy reading
|
|
||||||
"""
|
|
||||||
with redirect_stdout(sys.stderr):
|
with redirect_stdout(sys.stderr):
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
status = await get_pipeline_status(
|
status = await get_pipeline_status(
|
||||||
|
|
|
||||||
|
|
@ -178,18 +178,10 @@ class MilvusAdapter(VectorDBInterface):
|
||||||
):
|
):
|
||||||
from pymilvus import MilvusException, exceptions
|
from pymilvus import MilvusException, exceptions
|
||||||
|
|
||||||
if limit <= 0:
|
|
||||||
return []
|
|
||||||
client = self.get_milvus_client()
|
client = self.get_milvus_client()
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise ValueError("One of query_text or query_vector must be provided!")
|
raise ValueError("One of query_text or query_vector must be provided!")
|
||||||
|
|
||||||
if not client.has_collection(collection_name=collection_name):
|
|
||||||
logger.warning(
|
|
||||||
f"Collection '{collection_name}' not found in MilvusAdapter.search; returning []."
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
query_vector = query_vector or (await self.embed_data([query_text]))[0]
|
query_vector = query_vector or (await self.embed_data([query_text]))[0]
|
||||||
|
|
||||||
|
|
@ -216,19 +208,12 @@ class MilvusAdapter(VectorDBInterface):
|
||||||
)
|
)
|
||||||
for result in results[0]
|
for result in results[0]
|
||||||
]
|
]
|
||||||
except exceptions.CollectionNotExistException:
|
except exceptions.CollectionNotExistException as error:
|
||||||
logger.warning(
|
raise CollectionNotFoundError(
|
||||||
f"Collection '{collection_name}' not found (exception) in MilvusAdapter.search; returning []."
|
f"Collection '{collection_name}' does not exist!"
|
||||||
)
|
) from error
|
||||||
return []
|
|
||||||
except MilvusException as e:
|
except MilvusException as e:
|
||||||
# Catch other Milvus errors that are "collection not found" (paranoid safety)
|
logger.error(f"Error during search in collection '{collection_name}': {str(e)}")
|
||||||
if "collection not found" in str(e).lower() or "schema" in str(e).lower():
|
|
||||||
logger.warning(
|
|
||||||
f"Collection '{collection_name}' not found (MilvusException) in MilvusAdapter.search; returning []."
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
logger.error(f"Error searching Milvus collection '{collection_name}': {e}")
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def batch_search(
|
async def batch_search(
|
||||||
|
|
|
||||||
|
|
@ -159,24 +159,12 @@ class QDrantAdapter(VectorDBInterface):
|
||||||
query_vector: Optional[List[float]] = None,
|
query_vector: Optional[List[float]] = None,
|
||||||
limit: int = 15,
|
limit: int = 15,
|
||||||
with_vector: bool = False,
|
with_vector: bool = False,
|
||||||
) -> List[ScoredResult]:
|
):
|
||||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
from qdrant_client.http.exceptions import UnexpectedResponse
|
||||||
|
|
||||||
if query_text is None and query_vector is None:
|
if query_text is None and query_vector is None:
|
||||||
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
raise InvalidValueError(message="One of query_text or query_vector must be provided!")
|
||||||
|
|
||||||
if limit <= 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if not await self.has_collection(collection_name):
|
|
||||||
logger.warning(
|
|
||||||
f"Collection '{collection_name}' not found in QdrantAdapter.search; returning []."
|
|
||||||
)
|
|
||||||
return []
|
|
||||||
|
|
||||||
if query_vector is None:
|
|
||||||
query_vector = (await self.embed_data([query_text]))[0]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = self.get_qdrant_client()
|
client = self.get_qdrant_client()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ class WeaviateAdapter(VectorDBInterface):
|
||||||
# )
|
# )
|
||||||
else:
|
else:
|
||||||
data_point: DataObject = data_points[0]
|
data_point: DataObject = data_points[0]
|
||||||
if await collection.data.exists(data_point.uuid):
|
if collection.data.exists(data_point.uuid):
|
||||||
return await collection.data.update(
|
return await collection.data.update(
|
||||||
uuid=data_point.uuid,
|
uuid=data_point.uuid,
|
||||||
vector=data_point.vector,
|
vector=data_point.vector,
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ async def brute_force_search(
|
||||||
async def search_in_collection(collection_name: str):
|
async def search_in_collection(collection_name: str):
|
||||||
try:
|
try:
|
||||||
return await vector_engine.search(
|
return await vector_engine.search(
|
||||||
collection_name=collection_name, query_text=query, limit=50
|
collection_name=collection_name, query_text=query, limit=0
|
||||||
)
|
)
|
||||||
except CollectionNotFoundError:
|
except CollectionNotFoundError:
|
||||||
return []
|
return []
|
||||||
|
|
|
||||||
|
|
@ -136,27 +136,6 @@ Repository = "https://github.com/topoteretes/cognee"
|
||||||
requires = ["hatchling"]
|
requires = ["hatchling"]
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[tool.hatch.build]
|
|
||||||
exclude = [
|
|
||||||
"/bin",
|
|
||||||
"/dist",
|
|
||||||
"/.data",
|
|
||||||
"/.github",
|
|
||||||
"/alembic",
|
|
||||||
"/distributed",
|
|
||||||
"/deployment",
|
|
||||||
"/cognee-mcp",
|
|
||||||
"/cognee-frontend",
|
|
||||||
"/examples",
|
|
||||||
"/helm",
|
|
||||||
"/licenses",
|
|
||||||
"/logs",
|
|
||||||
"/notebooks",
|
|
||||||
"/profiling",
|
|
||||||
"/tests",
|
|
||||||
"/tools",
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
line-length = 100
|
line-length = 100
|
||||||
exclude = [
|
exclude = [
|
||||||
|
|
|
||||||
2
uv.lock
generated
2
uv.lock
generated
|
|
@ -860,7 +860,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cognee"
|
name = "cognee"
|
||||||
version = "0.1.40"
|
version = "0.1.39"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiofiles" },
|
{ name = "aiofiles" },
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue