Merge branch 'main' into feat/COG-553-graph-memory-projection

2024-11-12 16:56:40 +01:00 · 2024-11-12 16:56:40 +01:00 · 9b62617394
commit 9b62617394
parent 7363909862 cdaf63f57c
12 changed files with 154 additions and 97 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.github/workflows/auto-comment.yml
+++ b/.github/workflows/auto-comment.yml
@ -1,81 +0,0 @@
-name: Issue and PR Auto Comments
-on:
-  issues:
-    types:
-      - opened
-      - closed
-      - assigned
-  pull_request_target:
-    types:
-      - opened
-      - closed
-
-permissions:
-  contents: read
-
-jobs:
-  auto-comment:
-    permissions:
-      issues: write
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-      # configuration for auto-comment actions
-      - name: Configure Auto Comments
-        uses: wow-actions/auto-comment@v1
-        with:
-          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
-          issuesOpened: |
-            👋 @{{ author }}
-            
-            Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
-            
-            To help us address your issue efficiently, please ensure you have provided:
-            - A clear description of the problem
-            - Steps to reproduce (if applicable)
-            - Expected vs actual behavior
-            - Any relevant screenshots or error messages
-            
-            Our team typically responds within 2-3 business days.
-
-          issuesClosed: |
-            ✅ @{{ author }}
-            
-            This issue has been closed. If you have any further questions or if the issue resurfaces, 
-            please feel free to:
-            - Add a comment to this thread
-            - Open a new issue with reference to this one
-            
-            Thank you for helping us improve!
-
-          pullRequestOpened: |
-            👍 @{{ author }}
-            
-            Thank you for your pull request and contributing to our community!
-            
-            Please ensure you have:
-            - [ ] Followed our contributing guidelines
-            - [ ] Added/updated tests (if applicable)
-            - [ ] Updated documentation (if applicable)
-            - [ ] Added a descriptive PR title
-            
-            Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
-
-      # Separate action for merged PRs
-      - name: Handle Merged Pull Requests
-        if: github.event.pull_request.merged == true
-        uses: actions-cool/pr-welcome@v1.4.0
-        with:
-          token: ${{ secrets.GH_TOKEN }}
-          comment: |
-            🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
-            
-            Your pull request has been merged successfully. Thank you for your valuable contribution!
-            
-            We appreciate the time and effort you've put into improving our project.
-            Your changes will be included in our next release.
-            
-            Keep up the great work! 💪
-          emoji: 'rocket'
-          pr-emoji: '+1, heart, rocket'
-          
--- a/.gitignore
+++ b/.gitignore
@ -4,7 +4,8 @@
 .prod.env
 cognee/.data/

-
+*.lance/
+.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/cognee/.DS_Store
+++ b/cognee/.DS_Store
--- a/cognee/infrastructure/databases/graph/networkx/adapter.py
+++ b/cognee/infrastructure/databases/graph/networkx/adapter.py
@ -284,16 +284,10 @@ class NetworkXAdapter(GraphDBInterface):
                    os.makedirs(file_dir, exist_ok = True)

                await self.save_graph_to_file(file_path)
-        except Exception as e:
-            logger.error("Failed to load graph from file: %s \n %s", file_path, str(e))
-            # Initialize an empty graph in case of error
-            self.graph = nx.MultiDiGraph()

-            file_dir = os.path.dirname(file_path)
-            if not os.path.exists(file_dir):
-                os.makedirs(file_dir, exist_ok = True)
+        except Exception:
+            logger.error("Failed to load graph from file: %s", file_path)

-            await self.save_graph_to_file(file_path)

    async def delete_graph(self, file_path: str = None):
        """Asynchronously delete the graph file from the filesystem."""
--- a/cognee/tasks/documents/classify_documents.py
+++ b/cognee/tasks/documents/classify_documents.py
@ -1,12 +1,17 @@
 from cognee.modules.data.models import Data
 from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument

+EXTENSION_TO_DOCUMENT_CLASS = {
+    "pdf": PdfDocument,
+    "audio": AudioDocument,
+    "image": ImageDocument,
+    "pdf": TextDocument,
+    "txt": TextDocument
+}
+
 def classify_documents(data_documents: list[Data]) -> list[Document]:
    documents = [
-        PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
-        AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
-        ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
-        TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
+        EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
        for data_item in data_documents
    ]

--- a/cognee/tasks/graph/infer_data_ontology.py
+++ b/cognee/tasks/graph/infer_data_ontology.py
@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng
 from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
 from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
 from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph
-from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology
+from cognee.tasks.graph.models import NodeModel, GraphOntology
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.modules.engine.utils import generate_node_id, generate_node_name

--- a/cognee/tasks/infer_data_ontology/models/models.py
+++ b/cognee/tasks/infer_data_ontology/models/models.py
@ -28,4 +28,4 @@ class OntologyEdge(BaseModel):

 class GraphOntology(BaseModel):
    nodes: list[OntologyNode]
-    edges: list[OntologyEdge]
+    edges: list[OntologyEdge]
--- a/cognee/tasks/search_evaluate/init.py
+++ b/cognee/tasks/search_evaluate/init.py
--- a/cognee/tasks/summarization/models/TextSummary.py
+++ b/cognee/tasks/summarization/models/TextSummary.py
--- a/cognee/tasks/summarization/summarize_text.py
+++ b/cognee/tasks/summarization/summarize_text.py
@ -5,7 +5,7 @@ from pydantic import BaseModel
 from cognee.modules.data.extraction.extract_summary import extract_summary
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
 from cognee.tasks.storage import add_data_points
-from .models.TextSummary import TextSummary
+from .models import TextSummary

 async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]):
    if len(data_chunks) == 0:
--- a/notebooks/cognee_code_graph_demo.ipynb
+++ b/notebooks/cognee_code_graph_demo.ipynb
@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n",
+    "os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cognee.modules.users.methods import get_default_user\n",
+    "\n",
+    "from cognee.modules.data.methods import get_datasets\n",
+    "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
+    "from cognee.modules.data.models import Data\n",
+    "\n",
+    "from cognee.modules.pipelines.tasks.Task import Task\n",
+    "from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n",
+    "from cognee.tasks.graph import extract_graph_from_code\n",
+    "from cognee.tasks.storage import add_data_points\n",
+    "from cognee.shared.SourceCodeGraph import SourceCodeGraph\n",
+    "\n",
+    "from cognee.modules.pipelines import run_tasks\n",
+    "\n",
+    "from cognee.shared.utils import render_graph\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "user = await get_default_user()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "existing_datasets = await get_datasets(user.id)\n",
+    "\n",
+    "datasets = {}\n",
+    "for dataset in existing_datasets:\n",
+    "    dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n",
+    "    data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
+    "    datasets[dataset_name] = data_documents\n",
+    "print(datasets.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tasks = [\n",
+    "    Task(classify_documents),\n",
+    "    Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
+    "    Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
+    "    Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
+    "    Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def run_codegraph_pipeline(tasks, data_documents):\n",
+    "    pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n",
+    "    results = []\n",
+    "    async for result in pipeline:\n",
+    "        results.append(result)\n",
+    "    return(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await render_graph(None, include_nodes = True, include_labels = True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cognee",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}