diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 000000000..c53aac86a Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/auto-comment.yml b/.github/workflows/auto-comment.yml deleted file mode 100644 index f38948f94..000000000 --- a/.github/workflows/auto-comment.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: Issue and PR Auto Comments -on: - issues: - types: - - opened - - closed - - assigned - pull_request_target: - types: - - opened - - closed - -permissions: - contents: read - -jobs: - auto-comment: - permissions: - issues: write - pull-requests: write - runs-on: ubuntu-latest - steps: - # configuration for auto-comment actions - - name: Configure Auto Comments - uses: wow-actions/auto-comment@v1 - with: - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} - issuesOpened: | - 👋 @{{ author }} - - Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible. - - To help us address your issue efficiently, please ensure you have provided: - - A clear description of the problem - - Steps to reproduce (if applicable) - - Expected vs actual behavior - - Any relevant screenshots or error messages - - Our team typically responds within 2-3 business days. - - issuesClosed: | - ✅ @{{ author }} - - This issue has been closed. If you have any further questions or if the issue resurfaces, - please feel free to: - - Add a comment to this thread - - Open a new issue with reference to this one - - Thank you for helping us improve! - - pullRequestOpened: | - 👍 @{{ author }} - - Thank you for your pull request and contributing to our community! - - Please ensure you have: - - [ ] Followed our contributing guidelines - - [ ] Added/updated tests (if applicable) - - [ ] Updated documentation (if applicable) - - [ ] Added a descriptive PR title - - Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance. - - # Separate action for merged PRs - - name: Handle Merged Pull Requests - if: github.event.pull_request.merged == true - uses: actions-cool/pr-welcome@v1.4.0 - with: - token: ${{ secrets.GH_TOKEN }} - comment: | - 🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉 - - Your pull request has been merged successfully. Thank you for your valuable contribution! - - We appreciate the time and effort you've put into improving our project. - Your changes will be included in our next release. - - Keep up the great work! 💪 - emoji: 'rocket' - pr-emoji: '+1, heart, rocket' - diff --git a/.gitignore b/.gitignore index f447655cf..d256013d2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,8 @@ .prod.env cognee/.data/ - +*.lance/ +.DS_Store # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/cognee/.DS_Store b/cognee/.DS_Store new file mode 100644 index 000000000..51227d71a Binary files /dev/null and b/cognee/.DS_Store differ diff --git a/cognee/infrastructure/databases/graph/networkx/adapter.py b/cognee/infrastructure/databases/graph/networkx/adapter.py index 65aeea289..a72376082 100644 --- a/cognee/infrastructure/databases/graph/networkx/adapter.py +++ b/cognee/infrastructure/databases/graph/networkx/adapter.py @@ -284,16 +284,10 @@ class NetworkXAdapter(GraphDBInterface): os.makedirs(file_dir, exist_ok = True) await self.save_graph_to_file(file_path) - except Exception as e: - logger.error("Failed to load graph from file: %s \n %s", file_path, str(e)) - # Initialize an empty graph in case of error - self.graph = nx.MultiDiGraph() - file_dir = os.path.dirname(file_path) - if not os.path.exists(file_dir): - os.makedirs(file_dir, exist_ok = True) + except Exception: + logger.error("Failed to load graph from file: %s", file_path) - await self.save_graph_to_file(file_path) async def delete_graph(self, file_path: str = None): """Asynchronously delete the graph file from the filesystem.""" diff --git a/cognee/tasks/documents/classify_documents.py b/cognee/tasks/documents/classify_documents.py index 64ed808d6..e83fe8917 100644 --- a/cognee/tasks/documents/classify_documents.py +++ b/cognee/tasks/documents/classify_documents.py @@ -1,12 +1,17 @@ from cognee.modules.data.models import Data from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument +EXTENSION_TO_DOCUMENT_CLASS = { + "pdf": PdfDocument, + "audio": AudioDocument, + "image": ImageDocument, + "pdf": TextDocument, + "txt": TextDocument +} + def classify_documents(data_documents: list[Data]) -> list[Document]: documents = [ - PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else - AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else - ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else - TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) + EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) for data_item in data_documents ] diff --git a/cognee/tasks/graph/infer_data_ontology.py b/cognee/tasks/graph/infer_data_ontology.py index e1a710fa2..eea378eb1 100644 --- a/cognee/tasks/graph/infer_data_ontology.py +++ b/cognee/tasks/graph/infer_data_ontology.py @@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph -from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology +from cognee.tasks.graph.models import NodeModel, GraphOntology from cognee.shared.data_models import KnowledgeGraph from cognee.modules.engine.utils import generate_node_id, generate_node_name diff --git a/cognee/tasks/infer_data_ontology/models/models.py b/cognee/tasks/graph/models.py similarity index 96% rename from cognee/tasks/infer_data_ontology/models/models.py rename to cognee/tasks/graph/models.py index 9c086b5c7..5b1108e6a 100644 --- a/cognee/tasks/infer_data_ontology/models/models.py +++ b/cognee/tasks/graph/models.py @@ -28,4 +28,4 @@ class OntologyEdge(BaseModel): class GraphOntology(BaseModel): nodes: list[OntologyNode] - edges: list[OntologyEdge] + edges: list[OntologyEdge] \ No newline at end of file diff --git a/cognee/tasks/search_evaluate/__init__.py b/cognee/tasks/search_evaluate/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cognee/tasks/summarization/models/TextSummary.py b/cognee/tasks/summarization/models.py similarity index 100% rename from cognee/tasks/summarization/models/TextSummary.py rename to cognee/tasks/summarization/models.py diff --git a/cognee/tasks/summarization/summarize_text.py b/cognee/tasks/summarization/summarize_text.py index 47d6946bb..2dab3b0f8 100644 --- a/cognee/tasks/summarization/summarize_text.py +++ b/cognee/tasks/summarization/summarize_text.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from cognee.modules.data.extraction.extract_summary import extract_summary from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.tasks.storage import add_data_points -from .models.TextSummary import TextSummary +from .models import TextSummary async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]): if len(data_chunks) == 0: diff --git a/notebooks/cognee_code_graph_demo.ipynb b/notebooks/cognee_code_graph_demo.ipynb new file mode 100644 index 000000000..5e21e9dad --- /dev/null +++ b/notebooks/cognee_code_graph_demo.ipynb @@ -0,0 +1,138 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n", + "os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cognee.modules.users.methods import get_default_user\n", + "\n", + "from cognee.modules.data.methods import get_datasets\n", + "from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n", + "from cognee.modules.data.models import Data\n", + "\n", + "from cognee.modules.pipelines.tasks.Task import Task\n", + "from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n", + "from cognee.tasks.graph import extract_graph_from_code\n", + "from cognee.tasks.storage import add_data_points\n", + "from cognee.shared.SourceCodeGraph import SourceCodeGraph\n", + "\n", + "from cognee.modules.pipelines import run_tasks\n", + "\n", + "from cognee.shared.utils import render_graph\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "user = await get_default_user()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "existing_datasets = await get_datasets(user.id)\n", + "\n", + "datasets = {}\n", + "for dataset in existing_datasets:\n", + " dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n", + " data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n", + " datasets[dataset_name] = data_documents\n", + "print(datasets.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tasks = [\n", + " Task(classify_documents),\n", + " Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n", + " Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n", + " Task(add_data_points, task_config = { \"batch_size\": 10 }),\n", + " Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def run_codegraph_pipeline(tasks, data_documents):\n", + " pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n", + " results = []\n", + " async for result in pipeline:\n", + " results.append(result)\n", + " return(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await render_graph(None, include_nodes = True, include_labels = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cognee", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}