Merge branch 'main' into feat/COG-553-graph-memory-projection

This commit is contained in:
hajdul88 2024-11-12 16:56:40 +01:00 committed by GitHub
commit 9b62617394
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 154 additions and 97 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View file

@ -1,81 +0,0 @@
name: Issue and PR Auto Comments
on:
issues:
types:
- opened
- closed
- assigned
pull_request_target:
types:
- opened
- closed
permissions:
contents: read
jobs:
auto-comment:
permissions:
issues: write
pull-requests: write
runs-on: ubuntu-latest
steps:
# configuration for auto-comment actions
- name: Configure Auto Comments
uses: wow-actions/auto-comment@v1
with:
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
issuesOpened: |
👋 @{{ author }}
Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
To help us address your issue efficiently, please ensure you have provided:
- A clear description of the problem
- Steps to reproduce (if applicable)
- Expected vs actual behavior
- Any relevant screenshots or error messages
Our team typically responds within 2-3 business days.
issuesClosed: |
✅ @{{ author }}
This issue has been closed. If you have any further questions or if the issue resurfaces,
please feel free to:
- Add a comment to this thread
- Open a new issue with reference to this one
Thank you for helping us improve!
pullRequestOpened: |
👍 @{{ author }}
Thank you for your pull request and contributing to our community!
Please ensure you have:
- [ ] Followed our contributing guidelines
- [ ] Added/updated tests (if applicable)
- [ ] Updated documentation (if applicable)
- [ ] Added a descriptive PR title
Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
# Separate action for merged PRs
- name: Handle Merged Pull Requests
if: github.event.pull_request.merged == true
uses: actions-cool/pr-welcome@v1.4.0
with:
token: ${{ secrets.GH_TOKEN }}
comment: |
🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
Your pull request has been merged successfully. Thank you for your valuable contribution!
We appreciate the time and effort you've put into improving our project.
Your changes will be included in our next release.
Keep up the great work! 💪
emoji: 'rocket'
pr-emoji: '+1, heart, rocket'

3
.gitignore vendored
View file

@ -4,7 +4,8 @@
.prod.env
cognee/.data/
*.lance/
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

BIN
cognee/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -284,16 +284,10 @@ class NetworkXAdapter(GraphDBInterface):
os.makedirs(file_dir, exist_ok = True)
await self.save_graph_to_file(file_path)
except Exception as e:
logger.error("Failed to load graph from file: %s \n %s", file_path, str(e))
# Initialize an empty graph in case of error
self.graph = nx.MultiDiGraph()
file_dir = os.path.dirname(file_path)
if not os.path.exists(file_dir):
os.makedirs(file_dir, exist_ok = True)
except Exception:
logger.error("Failed to load graph from file: %s", file_path)
await self.save_graph_to_file(file_path)
async def delete_graph(self, file_path: str = None):
"""Asynchronously delete the graph file from the filesystem."""

View file

@ -1,12 +1,17 @@
from cognee.modules.data.models import Data
from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument
EXTENSION_TO_DOCUMENT_CLASS = {
"pdf": PdfDocument,
"audio": AudioDocument,
"image": ImageDocument,
"pdf": TextDocument,
"txt": TextDocument
}
def classify_documents(data_documents: list[Data]) -> list[Document]:
documents = [
PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
for data_item in data_documents
]

View file

@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng
from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph
from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology
from cognee.tasks.graph.models import NodeModel, GraphOntology
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.engine.utils import generate_node_id, generate_node_name

View file

@ -28,4 +28,4 @@ class OntologyEdge(BaseModel):
class GraphOntology(BaseModel):
nodes: list[OntologyNode]
edges: list[OntologyEdge]
edges: list[OntologyEdge]

View file

@ -5,7 +5,7 @@ from pydantic import BaseModel
from cognee.modules.data.extraction.extract_summary import extract_summary
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.tasks.storage import add_data_points
from .models.TextSummary import TextSummary
from .models import TextSummary
async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]):
if len(data_chunks) == 0:

View file

@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n",
"os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from cognee.modules.users.methods import get_default_user\n",
"\n",
"from cognee.modules.data.methods import get_datasets\n",
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
"from cognee.modules.data.models import Data\n",
"\n",
"from cognee.modules.pipelines.tasks.Task import Task\n",
"from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n",
"from cognee.tasks.graph import extract_graph_from_code\n",
"from cognee.tasks.storage import add_data_points\n",
"from cognee.shared.SourceCodeGraph import SourceCodeGraph\n",
"\n",
"from cognee.modules.pipelines import run_tasks\n",
"\n",
"from cognee.shared.utils import render_graph\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"user = await get_default_user()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"existing_datasets = await get_datasets(user.id)\n",
"\n",
"datasets = {}\n",
"for dataset in existing_datasets:\n",
" dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n",
" data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
" datasets[dataset_name] = data_documents\n",
"print(datasets.keys())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tasks = [\n",
" Task(classify_documents),\n",
" Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
" Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
" Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
" Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"async def run_codegraph_pipeline(tasks, data_documents):\n",
" pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n",
" results = []\n",
" async for result in pipeline:\n",
" results.append(result)\n",
" return(results)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"await render_graph(None, include_nodes = True, include_labels = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "cognee",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}