Merge branch 'main' into feat/COG-553-graph-memory-projection
This commit is contained in:
commit
9b62617394
12 changed files with 154 additions and 97 deletions
BIN
.DS_Store
vendored
Normal file
BIN
.DS_Store
vendored
Normal file
Binary file not shown.
81
.github/workflows/auto-comment.yml
vendored
81
.github/workflows/auto-comment.yml
vendored
|
|
@ -1,81 +0,0 @@
|
||||||
name: Issue and PR Auto Comments
|
|
||||||
on:
|
|
||||||
issues:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- closed
|
|
||||||
- assigned
|
|
||||||
pull_request_target:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- closed
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
auto-comment:
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
pull-requests: write
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
# configuration for auto-comment actions
|
|
||||||
- name: Configure Auto Comments
|
|
||||||
uses: wow-actions/auto-comment@v1
|
|
||||||
with:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
|
|
||||||
issuesOpened: |
|
|
||||||
👋 @{{ author }}
|
|
||||||
|
|
||||||
Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
|
|
||||||
|
|
||||||
To help us address your issue efficiently, please ensure you have provided:
|
|
||||||
- A clear description of the problem
|
|
||||||
- Steps to reproduce (if applicable)
|
|
||||||
- Expected vs actual behavior
|
|
||||||
- Any relevant screenshots or error messages
|
|
||||||
|
|
||||||
Our team typically responds within 2-3 business days.
|
|
||||||
|
|
||||||
issuesClosed: |
|
|
||||||
✅ @{{ author }}
|
|
||||||
|
|
||||||
This issue has been closed. If you have any further questions or if the issue resurfaces,
|
|
||||||
please feel free to:
|
|
||||||
- Add a comment to this thread
|
|
||||||
- Open a new issue with reference to this one
|
|
||||||
|
|
||||||
Thank you for helping us improve!
|
|
||||||
|
|
||||||
pullRequestOpened: |
|
|
||||||
👍 @{{ author }}
|
|
||||||
|
|
||||||
Thank you for your pull request and contributing to our community!
|
|
||||||
|
|
||||||
Please ensure you have:
|
|
||||||
- [ ] Followed our contributing guidelines
|
|
||||||
- [ ] Added/updated tests (if applicable)
|
|
||||||
- [ ] Updated documentation (if applicable)
|
|
||||||
- [ ] Added a descriptive PR title
|
|
||||||
|
|
||||||
Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
|
|
||||||
|
|
||||||
# Separate action for merged PRs
|
|
||||||
- name: Handle Merged Pull Requests
|
|
||||||
if: github.event.pull_request.merged == true
|
|
||||||
uses: actions-cool/pr-welcome@v1.4.0
|
|
||||||
with:
|
|
||||||
token: ${{ secrets.GH_TOKEN }}
|
|
||||||
comment: |
|
|
||||||
🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
|
|
||||||
|
|
||||||
Your pull request has been merged successfully. Thank you for your valuable contribution!
|
|
||||||
|
|
||||||
We appreciate the time and effort you've put into improving our project.
|
|
||||||
Your changes will be included in our next release.
|
|
||||||
|
|
||||||
Keep up the great work! 💪
|
|
||||||
emoji: 'rocket'
|
|
||||||
pr-emoji: '+1, heart, rocket'
|
|
||||||
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -4,7 +4,8 @@
|
||||||
.prod.env
|
.prod.env
|
||||||
cognee/.data/
|
cognee/.data/
|
||||||
|
|
||||||
|
*.lance/
|
||||||
|
.DS_Store
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
|
||||||
BIN
cognee/.DS_Store
vendored
Normal file
BIN
cognee/.DS_Store
vendored
Normal file
Binary file not shown.
|
|
@ -284,16 +284,10 @@ class NetworkXAdapter(GraphDBInterface):
|
||||||
os.makedirs(file_dir, exist_ok = True)
|
os.makedirs(file_dir, exist_ok = True)
|
||||||
|
|
||||||
await self.save_graph_to_file(file_path)
|
await self.save_graph_to_file(file_path)
|
||||||
except Exception as e:
|
|
||||||
logger.error("Failed to load graph from file: %s \n %s", file_path, str(e))
|
|
||||||
# Initialize an empty graph in case of error
|
|
||||||
self.graph = nx.MultiDiGraph()
|
|
||||||
|
|
||||||
file_dir = os.path.dirname(file_path)
|
except Exception:
|
||||||
if not os.path.exists(file_dir):
|
logger.error("Failed to load graph from file: %s", file_path)
|
||||||
os.makedirs(file_dir, exist_ok = True)
|
|
||||||
|
|
||||||
await self.save_graph_to_file(file_path)
|
|
||||||
|
|
||||||
async def delete_graph(self, file_path: str = None):
|
async def delete_graph(self, file_path: str = None):
|
||||||
"""Asynchronously delete the graph file from the filesystem."""
|
"""Asynchronously delete the graph file from the filesystem."""
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,17 @@
|
||||||
from cognee.modules.data.models import Data
|
from cognee.modules.data.models import Data
|
||||||
from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument
|
from cognee.modules.data.processing.document_types import Document, PdfDocument, AudioDocument, ImageDocument, TextDocument
|
||||||
|
|
||||||
|
EXTENSION_TO_DOCUMENT_CLASS = {
|
||||||
|
"pdf": PdfDocument,
|
||||||
|
"audio": AudioDocument,
|
||||||
|
"image": ImageDocument,
|
||||||
|
"pdf": TextDocument,
|
||||||
|
"txt": TextDocument
|
||||||
|
}
|
||||||
|
|
||||||
def classify_documents(data_documents: list[Data]) -> list[Document]:
|
def classify_documents(data_documents: list[Data]) -> list[Document]:
|
||||||
documents = [
|
documents = [
|
||||||
PdfDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "pdf" else
|
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
|
||||||
AudioDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "audio" else
|
|
||||||
ImageDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location) if data_item.extension == "image" else
|
|
||||||
TextDocument(id = data_item.id, name=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location)
|
|
||||||
for data_item in data_documents
|
for data_item in data_documents
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_eng
|
||||||
from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
|
from cognee.infrastructure.files.utils.extract_text_from_file import extract_text_from_file
|
||||||
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
|
from cognee.infrastructure.files.utils.guess_file_type import guess_file_type, FileTypeException
|
||||||
from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph
|
from cognee.modules.data.extraction.knowledge_graph.add_model_class_to_graph import add_model_class_to_graph
|
||||||
from cognee.tasks.infer_data_ontology.models.models import NodeModel, GraphOntology
|
from cognee.tasks.graph.models import NodeModel, GraphOntology
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.modules.engine.utils import generate_node_id, generate_node_name
|
from cognee.modules.engine.utils import generate_node_id, generate_node_name
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from pydantic import BaseModel
|
||||||
from cognee.modules.data.extraction.extract_summary import extract_summary
|
from cognee.modules.data.extraction.extract_summary import extract_summary
|
||||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||||
from cognee.tasks.storage import add_data_points
|
from cognee.tasks.storage import add_data_points
|
||||||
from .models.TextSummary import TextSummary
|
from .models import TextSummary
|
||||||
|
|
||||||
async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]):
|
async def summarize_text(data_chunks: list[DocumentChunk], summarization_model: Type[BaseModel]):
|
||||||
if len(data_chunks) == 0:
|
if len(data_chunks) == 0:
|
||||||
|
|
|
||||||
138
notebooks/cognee_code_graph_demo.ipynb
Normal file
138
notebooks/cognee_code_graph_demo.ipynb
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"os.environ['GRAPHISTRY_USERNAME'] = input(\"Please enter your graphistry username\")\n",
|
||||||
|
"os.environ['GRAPHISTRY_PASSWORD'] = input(\"Please enter your graphistry password\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from cognee.modules.users.methods import get_default_user\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.data.methods import get_datasets\n",
|
||||||
|
"from cognee.modules.data.methods.get_dataset_data import get_dataset_data\n",
|
||||||
|
"from cognee.modules.data.models import Data\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.pipelines.tasks.Task import Task\n",
|
||||||
|
"from cognee.tasks.documents import classify_documents, check_permissions_on_documents, extract_chunks_from_documents\n",
|
||||||
|
"from cognee.tasks.graph import extract_graph_from_code\n",
|
||||||
|
"from cognee.tasks.storage import add_data_points\n",
|
||||||
|
"from cognee.shared.SourceCodeGraph import SourceCodeGraph\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.modules.pipelines import run_tasks\n",
|
||||||
|
"\n",
|
||||||
|
"from cognee.shared.utils import render_graph\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"user = await get_default_user()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"existing_datasets = await get_datasets(user.id)\n",
|
||||||
|
"\n",
|
||||||
|
"datasets = {}\n",
|
||||||
|
"for dataset in existing_datasets:\n",
|
||||||
|
" dataset_name = dataset.name.replace(\".\", \"_\").replace(\" \", \"_\")\n",
|
||||||
|
" data_documents: list[Data] = await get_dataset_data(dataset_id = dataset.id)\n",
|
||||||
|
" datasets[dataset_name] = data_documents\n",
|
||||||
|
"print(datasets.keys())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tasks = [\n",
|
||||||
|
" Task(classify_documents),\n",
|
||||||
|
" Task(check_permissions_on_documents, user = user, permissions = [\"write\"]),\n",
|
||||||
|
" Task(extract_chunks_from_documents), # Extract text chunks based on the document type.\n",
|
||||||
|
" Task(add_data_points, task_config = { \"batch_size\": 10 }),\n",
|
||||||
|
" Task(extract_graph_from_code, graph_model = SourceCodeGraph, task_config = { \"batch_size\": 10 }), # Generate knowledge graphs from the document chunks.\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"async def run_codegraph_pipeline(tasks, data_documents):\n",
|
||||||
|
" pipeline = run_tasks(tasks, data_documents, \"code_graph_pipeline\")\n",
|
||||||
|
" results = []\n",
|
||||||
|
" async for result in pipeline:\n",
|
||||||
|
" results.append(result)\n",
|
||||||
|
" return(results)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results = await run_codegraph_pipeline(tasks, datasets[\"main_dataset\"])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"await render_graph(None, include_nodes = True, include_labels = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "cognee",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue