Merge remote-tracking branch 'origin/dev' into feat/modal-parallelization

2025-04-23 12:23:04 +02:00 · 2025-04-23 12:23:04 +02:00 · 631f816323
commit 631f816323
parent 5692ef096e f404386df5
12 changed files with 1971 additions and 1584 deletions
--- a/2
+++ b/2
@ -5,7 +5,7 @@ ARG POETRY_EXTRAS="\
 # API \
 api \
 # Storage & Databases \
-filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
+postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
 # Notebooks & Interactive Environments \
 notebook \
 # LLM & AI Frameworks \
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@ -1,19 +1,19 @@
 [project]
 name = "cognee-mcp"
-version = "0.2.2"
+version = "0.2.3"
 description = "A MCP server project"
 readme = "README.md"
 requires-python = ">=3.10"

 dependencies = [
-    "cognee[postgres,codegraph,gemini,huggingface]==0.1.37",
+    "cognee[postgres,codegraph,gemini,huggingface]==0.1.38",
    "mcp==1.5.0",
    "uv>=0.6.3",
 ]

 [[project.authors]]
-name = "Rita Aleksziev"
-email = "rita@topoteretes.com"
+name = "Boris Arzentar"
+email = "boris@topoteretes.com"

 [build-system]
 requires = [ "hatchling", ]
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
--- a/cognee/infrastructure/databases/graph/graph_db_interface.py
+++ b/cognee/infrastructure/databases/graph/graph_db_interface.py
@ -21,10 +21,10 @@ Node = Tuple[str, NodeData]  # (node_id, properties)

 def record_graph_changes(func):
    """Decorator to record graph changes in the relationship database."""
-    db_engine = get_relational_engine()

    @wraps(func)
    async def wrapper(self, *args, **kwargs):
+        db_engine = get_relational_engine()
        frame = inspect.currentframe()
        while frame:
            if frame.f_back and frame.f_back.f_code.co_name != "wrapper":
--- a/cognee/modules/data/extraction/knowledge_graph/extract_content_graph.py
+++ b/cognee/modules/data/extraction/knowledge_graph/extract_content_graph.py
@ -1,4 +1,5 @@
-from typing import Type, Optional
+import os
+from typing import Type
 from pydantic import BaseModel
 from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.infrastructure.llm.prompts import render_prompt
@ -10,7 +11,18 @@ async def extract_content_graph(content: str, response_model: Type[BaseModel]):
    llm_config = get_llm_config()

    prompt_path = llm_config.graph_prompt_path
-    system_prompt = render_prompt(prompt_path, {})
+
+    # Check if the prompt path is an absolute path or just a filename
+    if os.path.isabs(prompt_path):
+        # directory containing the file
+        base_directory = os.path.dirname(prompt_path)
+        # just the filename itself
+        prompt_path = os.path.basename(prompt_path)
+    else:
+        base_directory = None
+
+    system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
+
    content_graph = await llm_client.acreate_structured_output(
        content, system_prompt, response_model
    )
--- a/cognee/modules/retrieval/code_retriever.py
+++ b/cognee/modules/retrieval/code_retriever.py
@ -19,9 +19,9 @@ class CodeRetriever(BaseRetriever):
        filenames: List[str] = []
        sourcecode: str

-    def __init__(self, limit: int = 3):
+    def __init__(self, top_k: int = 3):
        """Initialize retriever with search parameters."""
-        self.limit = limit
+        self.top_k = top_k
        self.file_name_collections = ["CodeFile_name"]
        self.classes_and_functions_collections = [
            "ClassDefinition_source_code",
@ -60,7 +60,7 @@ class CodeRetriever(BaseRetriever):
        if not files_and_codeparts.filenames or not files_and_codeparts.sourcecode:
            for collection in self.file_name_collections:
                search_results_file = await vector_engine.search(
-                    collection, query, limit=self.limit
+                    collection, query, limit=self.top_k
                )
                for res in search_results_file:
                    similar_filenames.append(
@ -69,7 +69,7 @@ class CodeRetriever(BaseRetriever):

            for collection in self.classes_and_functions_collections:
                search_results_code = await vector_engine.search(
-                    collection, query, limit=self.limit
+                    collection, query, limit=self.top_k
                )
                for res in search_results_code:
                    similar_codepieces.append(
@ -79,7 +79,7 @@ class CodeRetriever(BaseRetriever):
            for collection in self.file_name_collections:
                for file_from_query in files_and_codeparts.filenames:
                    search_results_file = await vector_engine.search(
-                        collection, file_from_query, limit=self.limit
+                        collection, file_from_query, limit=self.top_k
                    )
                    for res in search_results_file:
                        similar_filenames.append(
@ -88,7 +88,7 @@ class CodeRetriever(BaseRetriever):

            for collection in self.classes_and_functions_collections:
                search_results_code = await vector_engine.search(
-                    collection, files_and_codeparts.sourcecode, limit=self.limit
+                    collection, files_and_codeparts.sourcecode, limit=self.top_k
                )
                for res in search_results_code:
                    similar_codepieces.append(
--- a/cognee/modules/retrieval/summaries_retriever.py
+++ b/cognee/modules/retrieval/summaries_retriever.py
@ -9,9 +9,9 @@ from cognee.infrastructure.databases.vector.exceptions.exceptions import Collect
 class SummariesRetriever(BaseRetriever):
    """Retriever for handling summary-based searches."""

-    def __init__(self, limit: int = 5):
+    def __init__(self, top_k: int = 5):
        """Initialize retriever with search parameters."""
-        self.limit = limit
+        self.top_k = top_k

    async def get_context(self, query: str) -> Any:
        """Retrieves summary context based on the query."""
@ -19,7 +19,7 @@ class SummariesRetriever(BaseRetriever):

        try:
            summaries_results = await vector_engine.search(
-                "TextSummary_text", query, limit=self.limit
+                "TextSummary_text", query, limit=self.top_k
            )
        except CollectionNotFoundError as error:
            raise NoDataError("No data found in the system, please add data first.") from error
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@ -59,9 +59,9 @@ async def specific_search(
    top_k: int = 10,
 ) -> list:
    search_tasks: dict[SearchType, Callable] = {
-        SearchType.SUMMARIES: SummariesRetriever().get_completion,
+        SearchType.SUMMARIES: SummariesRetriever(top_k=top_k).get_completion,
        SearchType.INSIGHTS: InsightsRetriever(top_k=top_k).get_completion,
-        SearchType.CHUNKS: ChunksRetriever().get_completion,
+        SearchType.CHUNKS: ChunksRetriever(top_k=top_k).get_completion,
        SearchType.RAG_COMPLETION: CompletionRetriever(
            system_prompt_path=system_prompt_path,
            top_k=top_k,
@ -71,9 +71,9 @@ async def specific_search(
            top_k=top_k,
        ).get_completion,
        SearchType.GRAPH_SUMMARY_COMPLETION: GraphSummaryCompletionRetriever(
-            system_prompt_path=system_prompt_path,
+            system_prompt_path=system_prompt_path, top_k=top_k
        ).get_completion,
-        SearchType.CODE: CodeRetriever().get_completion,
+        SearchType.CODE: CodeRetriever(top_k=top_k).get_completion,
        SearchType.CYPHER: CypherSearchRetriever().get_completion,
        SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
    }
--- a/cognee/tasks/graph/extract_graph_from_data.py
+++ b/cognee/tasks/graph/extract_graph_from_data.py
@ -62,6 +62,16 @@ async def extract_graph_from_data(
        *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
    )

+    # Note: Filter edges with missing source or target nodes
+    if graph_model == KnowledgeGraph:
+        for graph in chunk_graphs:
+            valid_node_ids = {node.id for node in graph.nodes}
+            graph.edges = [
+                edge
+                for edge in graph.edges
+                if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
+            ]
+
    return await integrate_chunk_graphs(
        data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
    )
--- a/cognee/tests/test_deletion.py
+++ b/cognee/tests/test_deletion.py
@ -57,7 +57,7 @@ async def main():

    graph_engine = await get_graph_engine()
    nodes, edges = await graph_engine.get_graph_data()
-    assert len(nodes) > 15 and len(edges) > 15, "Graph database is not loaded."
+    assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."

    await cognee.delete([text_1, text_2], mode="hard")
    nodes, edges = await graph_engine.get_graph_data()
--- a/helm/Dockerfile
+++ b/helm/Dockerfile
@ -3,7 +3,7 @@ FROM python:3.11-slim
 # Define Poetry extras to install
 ARG POETRY_EXTRAS="\
 # Storage & Databases \
-filesystem postgres weaviate qdrant neo4j falkordb milvus kuzu \
+postgres weaviate qdrant neo4j falkordb milvus kuzu \
 # Notebooks & Interactive Environments \
 notebook \
 # LLM & AI Frameworks \
--- a/poetry.lock
+++ b/poetry.lock