Merge branch 'dev' into add-smithery
This commit is contained in:
commit
f2c3b21f9e
9 changed files with 4877 additions and 154 deletions
87
README.md
87
README.md
|
|
@ -193,93 +193,14 @@ if __name__ == '__main__':
|
|||
When you run this script, you will see step-by-step messages in the console that help you trace the execution flow and understand what the script is doing at each stage.
|
||||
A version of this example is here: `examples/python/simple_example.py`
|
||||
|
||||
### Create your own memory store
|
||||
### Understand our architecture
|
||||
|
||||
cognee framework consists of tasks that can be grouped into pipelines.
|
||||
Each task can be an independent part of business logic, that can be tied to other tasks to form a pipeline.
|
||||
These tasks persist data into your memory store enabling you to search for relevant context of past conversations, documents, or any other data you have stored.
|
||||
|
||||
|
||||
### Example: Classify your documents
|
||||
|
||||
Here is an example of how it looks for a default cognify pipeline:
|
||||
|
||||
1. To prepare the data for the pipeline run, first we need to add it to our metastore and normalize it:
|
||||
|
||||
Start with:
|
||||
```
|
||||
text = """Natural language processing (NLP) is an interdisciplinary
|
||||
subfield of computer science and information retrieval"""
|
||||
|
||||
await cognee.add(text) # Add a new piece of information
|
||||
```
|
||||
|
||||
2. In the next step we make a task. The task can be any business logic we need, but the important part is that it should be encapsulated in one function.
|
||||
|
||||
Here we show an example of creating a naive LLM classifier that takes a Pydantic model and then stores the data in both the graph and vector stores after analyzing each chunk.
|
||||
We provided just a snippet for reference, but feel free to check out the implementation in our repo.
|
||||
|
||||
```
|
||||
async def chunk_naive_llm_classifier(
|
||||
data_chunks: list[DocumentChunk],
|
||||
classification_model: Type[BaseModel]
|
||||
):
|
||||
# Extract classifications asynchronously
|
||||
chunk_classifications = await asyncio.gather(
|
||||
*(extract_categories(chunk.text, classification_model) for chunk in data_chunks)
|
||||
)
|
||||
|
||||
# Collect classification data points using a set to avoid duplicates
|
||||
classification_data_points = {
|
||||
uuid5(NAMESPACE_OID, cls.label.type)
|
||||
for cls in chunk_classifications
|
||||
} | {
|
||||
uuid5(NAMESPACE_OID, subclass.value)
|
||||
for cls in chunk_classifications
|
||||
for subclass in cls.label.subclass
|
||||
}
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
collection_name = "classification"
|
||||
|
||||
# Define the payload schema
|
||||
class Keyword(BaseModel):
|
||||
uuid: str
|
||||
text: str
|
||||
chunk_id: str
|
||||
document_id: str
|
||||
|
||||
# Ensure the collection exists and retrieve existing data points
|
||||
if not await vector_engine.has_collection(collection_name):
|
||||
await vector_engine.create_collection(collection_name, payload_schema=Keyword)
|
||||
existing_points_map = {}
|
||||
else:
|
||||
existing_points_map = {}
|
||||
return data_chunks
|
||||
|
||||
...
|
||||
|
||||
```
|
||||
|
||||
We have many tasks that can be used in your pipelines, and you can also create your tasks to fit your business logic.
|
||||
|
||||
|
||||
3. Once we have our tasks, it is time to group them into a pipeline.
|
||||
This simplified snippet demonstrates how tasks can be added to a pipeline, and how they can pass the information forward from one to another.
|
||||
|
||||
```
|
||||
|
||||
|
||||
Task(
|
||||
chunk_naive_llm_classifier,
|
||||
classification_model = cognee_config.classification_model,
|
||||
)
|
||||
|
||||
pipeline = run_tasks(tasks, documents)
|
||||
|
||||
```
|
||||
|
||||
To see the working code, check cognee.api.v1.cognify default pipeline in our repo.
|
||||
<div align="center">
|
||||
<img src="assets/cognee_diagram.png" alt="cognee concept diagram" width="50%" />
|
||||
</div>
|
||||
|
||||
|
||||
## Vector retrieval, Graphs and LLMs
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 77 KiB |
BIN
assets/cognee_diagram.png
Normal file
BIN
assets/cognee_diagram.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 428 KiB |
|
|
@ -1,5 +1,6 @@
|
|||
# cognee MCP server
|
||||
|
||||
|
||||
### Installing via Smithery
|
||||
|
||||
To install Cognee for Claude Desktop automatically via [Smithery](https://smithery.ai/server/cognee):
|
||||
|
|
@ -10,57 +11,44 @@ npx -y @smithery/cli install cognee --client claude
|
|||
|
||||
### Installing Manually
|
||||
A MCP server project
|
||||
=======
|
||||
1. Clone the [cognee](www.github.com/topoteretes/cognee) repo
|
||||
|
||||
Create a boilerplate server:
|
||||
|
||||
|
||||
2. Install dependencies
|
||||
|
||||
```jsx
|
||||
uvx create-mcp-server
|
||||
```
|
||||
|
||||
1. The command will ask you to name your server, e.g. mcp_cognee
|
||||
|
||||
|
||||
2. Answer “Y” to connect with Claude
|
||||
Then run
|
||||
|
||||
```jsx
|
||||
cd mcp_cognee
|
||||
cd cognee-mcp
|
||||
uv sync --dev --all-extras
|
||||
```
|
||||
|
||||
Activate the venv with
|
||||
3. Activate the venv with
|
||||
|
||||
```jsx
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
This should already add the new server to your Claude config, but if not, add these lines manually:
|
||||
4. Add the new server to your Claude config:
|
||||
|
||||
```
|
||||
"mcpcognee": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"--directory",
|
||||
"/Users/your_username/mcp/mcp_cognee",
|
||||
"run",
|
||||
"mcpcognee"
|
||||
],
|
||||
"env": {
|
||||
"ENV": "local",
|
||||
"TOKENIZERS_PARALLELISM": "false",
|
||||
"LLM_API_KEY": "add_your_api_key_here",
|
||||
"GRAPH_DATABASE_PROVIDER": "neo4j",
|
||||
"GRAPH_DATABASE_URL": "bolt://localhost:7687",
|
||||
"GRAPH_DATABASE_USERNAME": "add_username_here",
|
||||
"GRAPH_DATABASE_PASSWORD": "add_pwd_here",
|
||||
"VECTOR_DB_PROVIDER": "lancedb",
|
||||
"DB_PROVIDER": "sqlite",
|
||||
"DB_NAME": "postgres"
|
||||
}
|
||||
}
|
||||
```json
|
||||
"cognee": {
|
||||
"command": "uv",
|
||||
"args": [
|
||||
"--directory",
|
||||
"/{Absolute path to cognee directory}/cognee-mcp",
|
||||
"run",
|
||||
"cognee"
|
||||
],
|
||||
"env": {
|
||||
"ENV": "local",
|
||||
"TOKENIZERS_PARALLELISM": "false",
|
||||
"LLM_API_KEY": "add_your_api_key_here",
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then, edit the pyproject.toml in your new folder so that it includes packages from the cognee requirements. Use the pyproject.toml in your cognee library for this, but match the syntax of the automatically generated pyproject.toml so that it is compatible with uv.
|
||||
|
||||
Define cognify tool in server.py
|
||||
Restart your Claude desktop.
|
||||
Restart your Claude desktop.
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ def main():
|
|||
asyncio.run(server.main())
|
||||
|
||||
# Optionally expose other important items at package level
|
||||
__all__ = ['main', 'server']
|
||||
__all__ = ["main", "server"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
import importlib.util
|
||||
import os
|
||||
import asyncio
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
|
||||
import cognee
|
||||
|
|
@ -9,15 +10,17 @@ from cognee.api.v1.search import SearchType
|
|||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from mcp.server import NotificationOptions, Server
|
||||
from mcp.server.models import InitializationOptions
|
||||
from pydantic import AnyUrl, BaseModel
|
||||
|
||||
server = Server("mcpcognee")
|
||||
server = Server("cognee-mcp")
|
||||
|
||||
|
||||
def node_to_string(node):
|
||||
keys_to_keep = ["chunk_index", "topological_rank", "cut_type", "id", "text"]
|
||||
keyset = set(keys_to_keep) & node.keys()
|
||||
return "Node(" + " ".join([key + ": " + str(node[key]) + "," for key in keyset]) + ")"
|
||||
# keys_to_keep = ["chunk_index", "topological_rank", "cut_type", "id", "text"]
|
||||
# keyset = set(keys_to_keep) & node.keys()
|
||||
# return "Node(" + " ".join([key + ": " + str(node[key]) + "," for key in keyset]) + ")"
|
||||
node_data = ", ".join([f"{key}: \"{value}\"" for key, value in node.items() if key in ["id", "name"]])
|
||||
|
||||
return f"Node({node_data})"
|
||||
|
||||
|
||||
def retrieved_edges_to_string(search_results):
|
||||
|
|
@ -49,60 +52,107 @@ async def handle_list_tools() -> list[types.Tool]:
|
|||
"""
|
||||
return [
|
||||
types.Tool(
|
||||
name="Cognify_and_search",
|
||||
description="Build knowledge graph from the input text and search in it.",
|
||||
inputSchema={
|
||||
name = "cognify",
|
||||
description = "Build knowledge graph from the input text.",
|
||||
inputSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string"},
|
||||
"search_query": {"type": "string"},
|
||||
"graph_model_file": {"type": "string"},
|
||||
"graph_model_name": {"type": "string"},
|
||||
},
|
||||
"required": ["text", "search_query"],
|
||||
"required": ["text"],
|
||||
},
|
||||
)
|
||||
),
|
||||
types.Tool(
|
||||
name = "search",
|
||||
description = "Search the knowledge graph.",
|
||||
inputSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
),
|
||||
types.Tool(
|
||||
name = "prune",
|
||||
description = "Reset the knowledge graph.",
|
||||
inputSchema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
},
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@server.call_tool()
|
||||
async def handle_call_tool(
|
||||
name: str, arguments: dict | None
|
||||
name: str,
|
||||
arguments: dict | None
|
||||
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
||||
"""
|
||||
Handle tool execution requests.
|
||||
Tools can modify server state and notify clients of changes.
|
||||
"""
|
||||
if name == "Cognify_and_search":
|
||||
if name == "cognify":
|
||||
with open(os.devnull, "w") as fnull:
|
||||
with redirect_stdout(fnull), redirect_stderr(fnull):
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
if not arguments:
|
||||
raise ValueError("Missing arguments")
|
||||
|
||||
text = arguments.get("text")
|
||||
search_query = arguments.get("search_query")
|
||||
|
||||
if ("graph_model_file" in arguments) and ("graph_model_name" in arguments):
|
||||
model_file = arguments.get("graph_model_file")
|
||||
model_name = arguments.get("graph_model_name")
|
||||
|
||||
graph_model = load_class(model_file, model_name)
|
||||
else:
|
||||
graph_model = KnowledgeGraph
|
||||
|
||||
await cognee.add(text)
|
||||
await cognee.cognify(graph_model=graph_model)
|
||||
|
||||
await cognee.cognify(graph_model = graph_model)
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type = "text",
|
||||
text = "Ingested",
|
||||
)
|
||||
]
|
||||
elif name == "search":
|
||||
with open(os.devnull, "w") as fnull:
|
||||
with redirect_stdout(fnull), redirect_stderr(fnull):
|
||||
if not arguments:
|
||||
raise ValueError("Missing arguments")
|
||||
|
||||
search_query = arguments.get("query")
|
||||
|
||||
search_results = await cognee.search(
|
||||
SearchType.INSIGHTS, query_text=search_query
|
||||
SearchType.INSIGHTS, query_text = search_query
|
||||
)
|
||||
|
||||
results = retrieved_edges_to_string(search_results)
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type="text",
|
||||
text=results,
|
||||
type = "text",
|
||||
text = results,
|
||||
)
|
||||
]
|
||||
elif name == "prune":
|
||||
with open(os.devnull, "w") as fnull:
|
||||
with redirect_stdout(fnull), redirect_stderr(fnull):
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
return [
|
||||
types.TextContent(
|
||||
type = "text",
|
||||
text = "Pruned",
|
||||
)
|
||||
]
|
||||
else:
|
||||
|
|
@ -116,11 +166,15 @@ async def main():
|
|||
read_stream,
|
||||
write_stream,
|
||||
InitializationOptions(
|
||||
server_name="mcpcognee",
|
||||
server_version="0.1.0",
|
||||
capabilities=server.get_capabilities(
|
||||
notification_options=NotificationOptions(),
|
||||
experimental_capabilities={},
|
||||
server_name = "cognee-mcp",
|
||||
server_version = "0.1.0",
|
||||
capabilities = server.get_capabilities(
|
||||
notification_options = NotificationOptions(),
|
||||
experimental_capabilities = {},
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# This is needed if you'd like to connect to a custom client
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
from mcpcognee import main
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
[project]
|
||||
name = "mcpcognee"
|
||||
name = "cognee-mcp"
|
||||
version = "0.1.0"
|
||||
description = "A MCP server project"
|
||||
readme = "README.md"
|
||||
|
|
@ -91,4 +91,4 @@ dev = [
|
|||
]
|
||||
|
||||
[project.scripts]
|
||||
mcpcognee = "mcpcognee:main"
|
||||
cognee = "cognee_mcp:main"
|
||||
|
|
|
|||
4764
cognee-mcp/uv.lock
generated
Normal file
4764
cognee-mcp/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue