Merge remote-tracking branch 'origin/dev' into feat/modal-parallelization
This commit is contained in:
commit
631f816323
12 changed files with 1971 additions and 1584 deletions
|
|
@ -5,7 +5,7 @@ ARG POETRY_EXTRAS="\
|
||||||
# API \
|
# API \
|
||||||
api \
|
api \
|
||||||
# Storage & Databases \
|
# Storage & Databases \
|
||||||
filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
|
postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
|
||||||
# Notebooks & Interactive Environments \
|
# Notebooks & Interactive Environments \
|
||||||
notebook \
|
notebook \
|
||||||
# LLM & AI Frameworks \
|
# LLM & AI Frameworks \
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,19 @@
|
||||||
[project]
|
[project]
|
||||||
name = "cognee-mcp"
|
name = "cognee-mcp"
|
||||||
version = "0.2.2"
|
version = "0.2.3"
|
||||||
description = "A MCP server project"
|
description = "A MCP server project"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cognee[postgres,codegraph,gemini,huggingface]==0.1.37",
|
"cognee[postgres,codegraph,gemini,huggingface]==0.1.38",
|
||||||
"mcp==1.5.0",
|
"mcp==1.5.0",
|
||||||
"uv>=0.6.3",
|
"uv>=0.6.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[project.authors]]
|
[[project.authors]]
|
||||||
name = "Rita Aleksziev"
|
name = "Boris Arzentar"
|
||||||
email = "rita@topoteretes.com"
|
email = "boris@topoteretes.com"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = [ "hatchling", ]
|
requires = [ "hatchling", ]
|
||||||
|
|
|
||||||
1819
cognee-mcp/uv.lock
generated
1819
cognee-mcp/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -21,10 +21,10 @@ Node = Tuple[str, NodeData] # (node_id, properties)
|
||||||
|
|
||||||
def record_graph_changes(func):
|
def record_graph_changes(func):
|
||||||
"""Decorator to record graph changes in the relationship database."""
|
"""Decorator to record graph changes in the relationship database."""
|
||||||
db_engine = get_relational_engine()
|
|
||||||
|
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
async def wrapper(self, *args, **kwargs):
|
async def wrapper(self, *args, **kwargs):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
frame = inspect.currentframe()
|
frame = inspect.currentframe()
|
||||||
while frame:
|
while frame:
|
||||||
if frame.f_back and frame.f_back.f_code.co_name != "wrapper":
|
if frame.f_back and frame.f_back.f_code.co_name != "wrapper":
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from typing import Type, Optional
|
import os
|
||||||
|
from typing import Type
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
from cognee.infrastructure.llm.prompts import render_prompt
|
from cognee.infrastructure.llm.prompts import render_prompt
|
||||||
|
|
@ -10,7 +11,18 @@ async def extract_content_graph(content: str, response_model: Type[BaseModel]):
|
||||||
llm_config = get_llm_config()
|
llm_config = get_llm_config()
|
||||||
|
|
||||||
prompt_path = llm_config.graph_prompt_path
|
prompt_path = llm_config.graph_prompt_path
|
||||||
system_prompt = render_prompt(prompt_path, {})
|
|
||||||
|
# Check if the prompt path is an absolute path or just a filename
|
||||||
|
if os.path.isabs(prompt_path):
|
||||||
|
# directory containing the file
|
||||||
|
base_directory = os.path.dirname(prompt_path)
|
||||||
|
# just the filename itself
|
||||||
|
prompt_path = os.path.basename(prompt_path)
|
||||||
|
else:
|
||||||
|
base_directory = None
|
||||||
|
|
||||||
|
system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
|
||||||
|
|
||||||
content_graph = await llm_client.acreate_structured_output(
|
content_graph = await llm_client.acreate_structured_output(
|
||||||
content, system_prompt, response_model
|
content, system_prompt, response_model
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,9 @@ class CodeRetriever(BaseRetriever):
|
||||||
filenames: List[str] = []
|
filenames: List[str] = []
|
||||||
sourcecode: str
|
sourcecode: str
|
||||||
|
|
||||||
def __init__(self, limit: int = 3):
|
def __init__(self, top_k: int = 3):
|
||||||
"""Initialize retriever with search parameters."""
|
"""Initialize retriever with search parameters."""
|
||||||
self.limit = limit
|
self.top_k = top_k
|
||||||
self.file_name_collections = ["CodeFile_name"]
|
self.file_name_collections = ["CodeFile_name"]
|
||||||
self.classes_and_functions_collections = [
|
self.classes_and_functions_collections = [
|
||||||
"ClassDefinition_source_code",
|
"ClassDefinition_source_code",
|
||||||
|
|
@ -60,7 +60,7 @@ class CodeRetriever(BaseRetriever):
|
||||||
if not files_and_codeparts.filenames or not files_and_codeparts.sourcecode:
|
if not files_and_codeparts.filenames or not files_and_codeparts.sourcecode:
|
||||||
for collection in self.file_name_collections:
|
for collection in self.file_name_collections:
|
||||||
search_results_file = await vector_engine.search(
|
search_results_file = await vector_engine.search(
|
||||||
collection, query, limit=self.limit
|
collection, query, limit=self.top_k
|
||||||
)
|
)
|
||||||
for res in search_results_file:
|
for res in search_results_file:
|
||||||
similar_filenames.append(
|
similar_filenames.append(
|
||||||
|
|
@ -69,7 +69,7 @@ class CodeRetriever(BaseRetriever):
|
||||||
|
|
||||||
for collection in self.classes_and_functions_collections:
|
for collection in self.classes_and_functions_collections:
|
||||||
search_results_code = await vector_engine.search(
|
search_results_code = await vector_engine.search(
|
||||||
collection, query, limit=self.limit
|
collection, query, limit=self.top_k
|
||||||
)
|
)
|
||||||
for res in search_results_code:
|
for res in search_results_code:
|
||||||
similar_codepieces.append(
|
similar_codepieces.append(
|
||||||
|
|
@ -79,7 +79,7 @@ class CodeRetriever(BaseRetriever):
|
||||||
for collection in self.file_name_collections:
|
for collection in self.file_name_collections:
|
||||||
for file_from_query in files_and_codeparts.filenames:
|
for file_from_query in files_and_codeparts.filenames:
|
||||||
search_results_file = await vector_engine.search(
|
search_results_file = await vector_engine.search(
|
||||||
collection, file_from_query, limit=self.limit
|
collection, file_from_query, limit=self.top_k
|
||||||
)
|
)
|
||||||
for res in search_results_file:
|
for res in search_results_file:
|
||||||
similar_filenames.append(
|
similar_filenames.append(
|
||||||
|
|
@ -88,7 +88,7 @@ class CodeRetriever(BaseRetriever):
|
||||||
|
|
||||||
for collection in self.classes_and_functions_collections:
|
for collection in self.classes_and_functions_collections:
|
||||||
search_results_code = await vector_engine.search(
|
search_results_code = await vector_engine.search(
|
||||||
collection, files_and_codeparts.sourcecode, limit=self.limit
|
collection, files_and_codeparts.sourcecode, limit=self.top_k
|
||||||
)
|
)
|
||||||
for res in search_results_code:
|
for res in search_results_code:
|
||||||
similar_codepieces.append(
|
similar_codepieces.append(
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,9 @@ from cognee.infrastructure.databases.vector.exceptions.exceptions import Collect
|
||||||
class SummariesRetriever(BaseRetriever):
|
class SummariesRetriever(BaseRetriever):
|
||||||
"""Retriever for handling summary-based searches."""
|
"""Retriever for handling summary-based searches."""
|
||||||
|
|
||||||
def __init__(self, limit: int = 5):
|
def __init__(self, top_k: int = 5):
|
||||||
"""Initialize retriever with search parameters."""
|
"""Initialize retriever with search parameters."""
|
||||||
self.limit = limit
|
self.top_k = top_k
|
||||||
|
|
||||||
async def get_context(self, query: str) -> Any:
|
async def get_context(self, query: str) -> Any:
|
||||||
"""Retrieves summary context based on the query."""
|
"""Retrieves summary context based on the query."""
|
||||||
|
|
@ -19,7 +19,7 @@ class SummariesRetriever(BaseRetriever):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
summaries_results = await vector_engine.search(
|
summaries_results = await vector_engine.search(
|
||||||
"TextSummary_text", query, limit=self.limit
|
"TextSummary_text", query, limit=self.top_k
|
||||||
)
|
)
|
||||||
except CollectionNotFoundError as error:
|
except CollectionNotFoundError as error:
|
||||||
raise NoDataError("No data found in the system, please add data first.") from error
|
raise NoDataError("No data found in the system, please add data first.") from error
|
||||||
|
|
|
||||||
|
|
@ -59,9 +59,9 @@ async def specific_search(
|
||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
) -> list:
|
) -> list:
|
||||||
search_tasks: dict[SearchType, Callable] = {
|
search_tasks: dict[SearchType, Callable] = {
|
||||||
SearchType.SUMMARIES: SummariesRetriever().get_completion,
|
SearchType.SUMMARIES: SummariesRetriever(top_k=top_k).get_completion,
|
||||||
SearchType.INSIGHTS: InsightsRetriever(top_k=top_k).get_completion,
|
SearchType.INSIGHTS: InsightsRetriever(top_k=top_k).get_completion,
|
||||||
SearchType.CHUNKS: ChunksRetriever().get_completion,
|
SearchType.CHUNKS: ChunksRetriever(top_k=top_k).get_completion,
|
||||||
SearchType.RAG_COMPLETION: CompletionRetriever(
|
SearchType.RAG_COMPLETION: CompletionRetriever(
|
||||||
system_prompt_path=system_prompt_path,
|
system_prompt_path=system_prompt_path,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
|
@ -71,9 +71,9 @@ async def specific_search(
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
).get_completion,
|
).get_completion,
|
||||||
SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
|
SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
|
||||||
system_prompt_path=system_prompt_path,
|
system_prompt_path=system_prompt_path, top_k=top_k
|
||||||
).get_completion,
|
).get_completion,
|
||||||
SearchType.CODE: CodeRetriever().get_completion,
|
SearchType.CODE: CodeRetriever(top_k=top_k).get_completion,
|
||||||
SearchType.CYPHER: CypherSearchRetriever().get_completion,
|
SearchType.CYPHER: CypherSearchRetriever().get_completion,
|
||||||
SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
|
SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,16 @@ async def extract_graph_from_data(
|
||||||
*[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
|
*[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Note: Filter edges with missing source or target nodes
|
||||||
|
if graph_model == KnowledgeGraph:
|
||||||
|
for graph in chunk_graphs:
|
||||||
|
valid_node_ids = {node.id for node in graph.nodes}
|
||||||
|
graph.edges = [
|
||||||
|
edge
|
||||||
|
for edge in graph.edges
|
||||||
|
if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
|
||||||
|
]
|
||||||
|
|
||||||
return await integrate_chunk_graphs(
|
return await integrate_chunk_graphs(
|
||||||
data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
|
data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ async def main():
|
||||||
|
|
||||||
graph_engine = await get_graph_engine()
|
graph_engine = await get_graph_engine()
|
||||||
nodes, edges = await graph_engine.get_graph_data()
|
nodes, edges = await graph_engine.get_graph_data()
|
||||||
assert len(nodes) > 15 and len(edges) > 15, "Graph database is not loaded."
|
assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
|
||||||
|
|
||||||
await cognee.delete([text_1, text_2], mode="hard")
|
await cognee.delete([text_1, text_2], mode="hard")
|
||||||
nodes, edges = await graph_engine.get_graph_data()
|
nodes, edges = await graph_engine.get_graph_data()
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ FROM python:3.11-slim
|
||||||
# Define Poetry extras to install
|
# Define Poetry extras to install
|
||||||
ARG POETRY_EXTRAS="\
|
ARG POETRY_EXTRAS="\
|
||||||
# Storage & Databases \
|
# Storage & Databases \
|
||||||
filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu \
|
postgres weaviate qdrant neo4j falkordb milvus kuzu \
|
||||||
# Notebooks & Interactive Environments \
|
# Notebooks & Interactive Environments \
|
||||||
notebook \
|
notebook \
|
||||||
# LLM & AI Frameworks \
|
# LLM & AI Frameworks \
|
||||||
|
|
|
||||||
1668
poetry.lock
generated
1668
poetry.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue