28
.data/code/example.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
'''
|
||||||
|
Given a string, find the length of the longest substring without repeating characters.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
Given "abcabcbb", the answer is "abc", which the length is 3.
|
||||||
|
|
||||||
|
Given "bbbbb", the answer is "b", with the length of 1.
|
||||||
|
|
||||||
|
Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
|
||||||
|
'''
|
||||||
|
|
||||||
|
class Solution(object):
|
||||||
|
def lengthOfLongestSubstring(self, s):
|
||||||
|
"""
|
||||||
|
:type s: str
|
||||||
|
:rtype: int
|
||||||
|
"""
|
||||||
|
mapSet = {}
|
||||||
|
start, result = 0, 0
|
||||||
|
|
||||||
|
for end in range(len(s)):
|
||||||
|
if s[end] in mapSet:
|
||||||
|
start = max(mapSet[s[end]], start)
|
||||||
|
result = max(result, end-start+1)
|
||||||
|
mapSet[s[end]] = end+1
|
||||||
|
|
||||||
|
return result
|
||||||
BIN
.data/multimedia/example.png
Normal file
|
After Width: | Height: | Size: 10 KiB |
BIN
.data/multimedia/text_to_speech.mp3
Normal file
BIN
.data/short_stories/soldiers-home.pdf
Normal file
6
.dlt/config.toml
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
# put your configuration values here
|
||||||
|
|
||||||
|
[runtime]
|
||||||
|
log_level = "WARNING" # the system log level of dlt
|
||||||
|
# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry
|
||||||
|
dlthub_telemetry = false
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
tools/bin
|
bin
|
||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
evals
|
evals
|
||||||
|
|
|
||||||
2
.gitignore
vendored
|
|
@ -1,4 +1,4 @@
|
||||||
examples/.data
|
.data
|
||||||
.env
|
.env
|
||||||
.local.env
|
.local.env
|
||||||
.prod.env
|
.prod.env
|
||||||
|
|
|
||||||
|
|
@ -21,12 +21,12 @@ WORKDIR /app
|
||||||
|
|
||||||
ENV PYTHONPATH=/app
|
ENV PYTHONPATH=/app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY ../pyproject.toml poetry.lock /app/
|
COPY pyproject.toml poetry.lock /app/
|
||||||
|
|
||||||
|
|
||||||
RUN pip install poetry
|
RUN pip install poetry
|
||||||
|
|
||||||
RUN poetry install --all-extras --no-root --without dev
|
RUN poetry install --all-extras --no-root --without dev
|
||||||
|
|
||||||
COPY ../cognee /app/cognee
|
COPY cognee/ /app/cognee
|
||||||
COPY ../README.md /app/README.md
|
COPY README.md /app/README.md
|
||||||
|
|
@ -35,9 +35,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
|
||||||
<p align="center">
|
<p align="center">
|
||||||
🌐 Available Languages
|
🌐 Available Languages
|
||||||
:
|
:
|
||||||
<a href="assets/community/README.pt.md">🇵🇹 Português</a>
|
<a href="community/README.pt.md">🇵🇹 Português</a>
|
||||||
·
|
·
|
||||||
<a href="assets/community/README.zh.md">🇨🇳 [中文]</a>
|
<a href="community/README.zh.md">🇨🇳 [中文]</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div style="text-align: center">
|
<div style="text-align: center">
|
||||||
|
|
|
||||||
117
alembic.ini
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
# A generic, single database configuration.
|
||||||
|
|
||||||
|
[alembic]
|
||||||
|
# path to migration scripts
|
||||||
|
# Use forward slashes (/) also on windows to provide an os agnostic path
|
||||||
|
script_location = alembic
|
||||||
|
|
||||||
|
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
||||||
|
# Uncomment the line below if you want the files to be prepended with date and time
|
||||||
|
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
||||||
|
# for all available tokens
|
||||||
|
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
||||||
|
|
||||||
|
# sys.path path, will be prepended to sys.path if present.
|
||||||
|
# defaults to the current working directory.
|
||||||
|
prepend_sys_path = .
|
||||||
|
|
||||||
|
# timezone to use when rendering the date within the migration file
|
||||||
|
# as well as the filename.
|
||||||
|
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
||||||
|
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
||||||
|
# string value is passed to ZoneInfo()
|
||||||
|
# leave blank for localtime
|
||||||
|
# timezone =
|
||||||
|
|
||||||
|
# max length of characters to apply to the "slug" field
|
||||||
|
# truncate_slug_length = 40
|
||||||
|
|
||||||
|
# set to 'true' to run the environment during
|
||||||
|
# the 'revision' command, regardless of autogenerate
|
||||||
|
# revision_environment = false
|
||||||
|
|
||||||
|
# set to 'true' to allow .pyc and .pyo files without
|
||||||
|
# a source .py file to be detected as revisions in the
|
||||||
|
# versions/ directory
|
||||||
|
# sourceless = false
|
||||||
|
|
||||||
|
# version location specification; This defaults
|
||||||
|
# to alembic/versions. When using multiple version
|
||||||
|
# directories, initial revisions must be specified with --version-path.
|
||||||
|
# The path separator used here should be the separator specified by "version_path_separator" below.
|
||||||
|
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
||||||
|
|
||||||
|
# version path separator; As mentioned above, this is the character used to split
|
||||||
|
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
||||||
|
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
||||||
|
# Valid values for version_path_separator are:
|
||||||
|
#
|
||||||
|
# version_path_separator = :
|
||||||
|
# version_path_separator = ;
|
||||||
|
# version_path_separator = space
|
||||||
|
# version_path_separator = newline
|
||||||
|
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
||||||
|
|
||||||
|
# set to 'true' to search source files recursively
|
||||||
|
# in each "version_locations" directory
|
||||||
|
# new in Alembic version 1.10
|
||||||
|
# recursive_version_locations = false
|
||||||
|
|
||||||
|
# the output encoding used when revision files
|
||||||
|
# are written from script.py.mako
|
||||||
|
# output_encoding = utf-8
|
||||||
|
|
||||||
|
sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s
|
||||||
|
|
||||||
|
|
||||||
|
[post_write_hooks]
|
||||||
|
# post_write_hooks defines scripts or Python functions that are run
|
||||||
|
# on newly generated revision scripts. See the documentation for further
|
||||||
|
# detail and examples
|
||||||
|
|
||||||
|
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
||||||
|
# hooks = black
|
||||||
|
# black.type = console_scripts
|
||||||
|
# black.entrypoint = black
|
||||||
|
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
||||||
|
|
||||||
|
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
||||||
|
# hooks = ruff
|
||||||
|
# ruff.type = exec
|
||||||
|
# ruff.executable = %(here)s/.venv/bin/ruff
|
||||||
|
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
[loggers]
|
||||||
|
keys = root,sqlalchemy,alembic
|
||||||
|
|
||||||
|
[handlers]
|
||||||
|
keys = console
|
||||||
|
|
||||||
|
[formatters]
|
||||||
|
keys = generic
|
||||||
|
|
||||||
|
[logger_root]
|
||||||
|
level = WARN
|
||||||
|
handlers = console
|
||||||
|
qualname =
|
||||||
|
|
||||||
|
[logger_sqlalchemy]
|
||||||
|
level = WARN
|
||||||
|
handlers =
|
||||||
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
|
[logger_alembic]
|
||||||
|
level = INFO
|
||||||
|
handlers =
|
||||||
|
qualname = alembic
|
||||||
|
|
||||||
|
[handler_console]
|
||||||
|
class = StreamHandler
|
||||||
|
args = (sys.stderr,)
|
||||||
|
level = NOTSET
|
||||||
|
formatter = generic
|
||||||
|
|
||||||
|
[formatter_generic]
|
||||||
|
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||||
|
datefmt = %H:%M:%S
|
||||||
|
Before Width: | Height: | Size: 2.3 MiB After Width: | Height: | Size: 353 KiB |
153
cognee-gui.py
Normal file
|
|
@ -0,0 +1,153 @@
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cognee
|
||||||
|
from PySide6.QtWidgets import (
|
||||||
|
QApplication,
|
||||||
|
QWidget,
|
||||||
|
QPushButton,
|
||||||
|
QLineEdit,
|
||||||
|
QFileDialog,
|
||||||
|
QVBoxLayout,
|
||||||
|
QHBoxLayout,
|
||||||
|
QLabel,
|
||||||
|
QMessageBox,
|
||||||
|
QTextEdit,
|
||||||
|
QProgressDialog,
|
||||||
|
)
|
||||||
|
from PySide6.QtCore import Qt
|
||||||
|
|
||||||
|
from qasync import QEventLoop # Import QEventLoop from qasync
|
||||||
|
except ImportError as e:
|
||||||
|
print(
|
||||||
|
"\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n"
|
||||||
|
)
|
||||||
|
print("\nTo install with poetry use:")
|
||||||
|
print("\npoetry install -E gui\n")
|
||||||
|
print("\nOr to install with poetry and all dependencies use:")
|
||||||
|
print("\npoetry install --all-extras\n")
|
||||||
|
print("\nTo install with pip use: ")
|
||||||
|
print('\npip install ".[gui]"\n')
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
class FileSearchApp(QWidget):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.selected_file = None
|
||||||
|
self.init_ui()
|
||||||
|
|
||||||
|
def init_ui(self):
|
||||||
|
# Horizontal layout for file upload and visualization buttons
|
||||||
|
button_layout = QHBoxLayout()
|
||||||
|
|
||||||
|
# Button to open file dialog
|
||||||
|
self.file_button = QPushButton("Upload File to Cognee", parent=self)
|
||||||
|
self.file_button.clicked.connect(self.open_file_dialog)
|
||||||
|
button_layout.addWidget(self.file_button)
|
||||||
|
|
||||||
|
# Button to visualize data
|
||||||
|
self.visualize_button = QPushButton("Visualize Data", parent=self)
|
||||||
|
self.visualize_button.clicked.connect(lambda: asyncio.ensure_future(self.visualize_data()))
|
||||||
|
button_layout.addWidget(self.visualize_button)
|
||||||
|
|
||||||
|
# Label to display selected file path
|
||||||
|
self.file_label = QLabel("No file selected", parent=self)
|
||||||
|
|
||||||
|
# Line edit for search input
|
||||||
|
self.search_input = QLineEdit(parent=self)
|
||||||
|
self.search_input.setPlaceholderText("Enter text to search...")
|
||||||
|
|
||||||
|
# Button to perform search; schedule the async search on click
|
||||||
|
self.search_button = QPushButton("Cognee Search", parent=self)
|
||||||
|
self.search_button.clicked.connect(lambda: asyncio.ensure_future(self._cognee_search()))
|
||||||
|
|
||||||
|
# Text output area for search results
|
||||||
|
self.result_output = QTextEdit(parent=self)
|
||||||
|
self.result_output.setReadOnly(True)
|
||||||
|
self.result_output.setPlaceholderText("Search results will appear here...")
|
||||||
|
|
||||||
|
# Progress dialog
|
||||||
|
self.progress_dialog = QProgressDialog("Processing..", None, 0, 0, parent=self)
|
||||||
|
self.progress_dialog.setWindowModality(Qt.WindowModal)
|
||||||
|
self.progress_dialog.setCancelButton(None) # Remove the cancel button
|
||||||
|
self.progress_dialog.close()
|
||||||
|
|
||||||
|
# Layout setup
|
||||||
|
layout = QVBoxLayout()
|
||||||
|
layout.addLayout(button_layout)
|
||||||
|
layout.addWidget(self.file_label)
|
||||||
|
layout.addWidget(self.search_input)
|
||||||
|
layout.addWidget(self.search_button)
|
||||||
|
layout.addWidget(self.result_output)
|
||||||
|
|
||||||
|
self.setLayout(layout)
|
||||||
|
self.setWindowTitle("Cognee")
|
||||||
|
self.resize(500, 300)
|
||||||
|
|
||||||
|
def open_file_dialog(self):
|
||||||
|
file_path, _ = QFileDialog.getOpenFileName(
|
||||||
|
self, "Select a File", "", "All Files (*.*);;Text Files (*.txt)"
|
||||||
|
)
|
||||||
|
if file_path:
|
||||||
|
self.selected_file = file_path
|
||||||
|
self.file_label.setText(f"Selected: {file_path}")
|
||||||
|
asyncio.ensure_future(self.process_file_async())
|
||||||
|
|
||||||
|
async def process_file_async(self):
|
||||||
|
"""Asynchronously add and process the selected file."""
|
||||||
|
# Disable the entire window
|
||||||
|
self.progress_dialog.show()
|
||||||
|
self.setEnabled(False)
|
||||||
|
try:
|
||||||
|
await cognee.add(self.selected_file)
|
||||||
|
await cognee.cognify()
|
||||||
|
except Exception as e:
|
||||||
|
QMessageBox.critical(self, "Error", f"File processing failed: {str(e)}")
|
||||||
|
# Once finished, re-enable the window
|
||||||
|
self.setEnabled(True)
|
||||||
|
self.progress_dialog.close()
|
||||||
|
|
||||||
|
async def _cognee_search(self):
|
||||||
|
"""Performs an async search and updates the result output."""
|
||||||
|
# Disable the entire window
|
||||||
|
self.setEnabled(False)
|
||||||
|
self.progress_dialog.show()
|
||||||
|
|
||||||
|
try:
|
||||||
|
search_text = self.search_input.text().strip()
|
||||||
|
result = await cognee.search(query_text=search_text)
|
||||||
|
print(result)
|
||||||
|
# Assuming result is a list-like object; adjust if necessary
|
||||||
|
self.result_output.setText(result[0])
|
||||||
|
except Exception as e:
|
||||||
|
QMessageBox.critical(self, "Error", f"Search failed: {str(e)}")
|
||||||
|
|
||||||
|
# Once finished, re-enable the window
|
||||||
|
self.setEnabled(True)
|
||||||
|
self.progress_dialog.close()
|
||||||
|
|
||||||
|
async def visualize_data(self):
|
||||||
|
"""Async slot for handling visualize data button press."""
|
||||||
|
import webbrowser
|
||||||
|
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
html_file = os.path.join(pathlib.Path(__file__).parent, ".data", "graph_visualization.html")
|
||||||
|
await visualize_graph(html_file)
|
||||||
|
webbrowser.open(f"file://{html_file}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = QApplication(sys.argv)
|
||||||
|
# Create a qasync event loop and set it as the current event loop
|
||||||
|
loop = QEventLoop(app)
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
|
||||||
|
window = FileSearchApp()
|
||||||
|
window.show()
|
||||||
|
|
||||||
|
with loop:
|
||||||
|
loop.run_forever()
|
||||||
|
Before Width: | Height: | Size: 262 KiB After Width: | Height: | Size: 262 KiB |
|
Before Width: | Height: | Size: 181 KiB After Width: | Height: | Size: 181 KiB |
|
|
@ -4,8 +4,8 @@ services:
|
||||||
networks:
|
networks:
|
||||||
- cognee-network
|
- cognee-network
|
||||||
build:
|
build:
|
||||||
context: ..
|
context: .
|
||||||
dockerfile: ../Dockerfile
|
dockerfile: Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ./cognee:/app/cognee
|
- ./cognee:/app/cognee
|
||||||
- .env:/app/.env
|
- .env:/app/.env
|
||||||
|
|
@ -33,8 +33,8 @@ services:
|
||||||
profiles:
|
profiles:
|
||||||
- ui
|
- ui
|
||||||
build:
|
build:
|
||||||
context: ../cognee-frontend
|
context: ./cognee-frontend
|
||||||
dockerfile: ../cognee-frontend/Dockerfile
|
dockerfile: Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ./cognee-frontend/src:/app/src
|
- ./cognee-frontend/src:/app/src
|
||||||
- ./cognee-frontend/public:/app/public
|
- ./cognee-frontend/public:/app/public
|
||||||
4
licenses/README.md
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
# Third party licenses
|
||||||
|
|
||||||
|
This folder contains the licenses of third-party open-source software that has been redistributed in this project.
|
||||||
|
Details of included files and modifications can be found in [NOTICE](/NOTICE.md).
|
||||||
|
|
@ -12,8 +12,8 @@ app = modal.App("cognee-runner")
|
||||||
|
|
||||||
image = (
|
image = (
|
||||||
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
||||||
.copy_local_file("../pyproject.toml", "pyproject.toml")
|
.copy_local_file("pyproject.toml", "pyproject.toml")
|
||||||
.copy_local_file("../poetry.lock", "poetry.lock")
|
.copy_local_file("poetry.lock", "poetry.lock")
|
||||||
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
|
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
|
||||||
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
|
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
|
||||||
.pip_install("protobuf", "h2")
|
.pip_install("protobuf", "h2")
|
||||||
62
profiling/graph_pydantic_conversion/benchmark_function.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
import statistics
|
||||||
|
import time
|
||||||
|
import tracemalloc
|
||||||
|
from typing import Any, Callable, Dict
|
||||||
|
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Benchmark a function for memory usage and computational performance.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
func: Function to benchmark
|
||||||
|
*args: Arguments to pass to the function
|
||||||
|
num_runs: Number of times to run the benchmark
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary containing benchmark metrics
|
||||||
|
"""
|
||||||
|
execution_times = []
|
||||||
|
peak_memory_usages = []
|
||||||
|
cpu_percentages = []
|
||||||
|
|
||||||
|
process = psutil.Process()
|
||||||
|
|
||||||
|
for _ in range(num_runs):
|
||||||
|
# Start memory tracking
|
||||||
|
tracemalloc.start()
|
||||||
|
|
||||||
|
# Measure execution time and CPU usage
|
||||||
|
start_time = time.perf_counter()
|
||||||
|
start_cpu_time = process.cpu_times()
|
||||||
|
|
||||||
|
end_cpu_time = process.cpu_times()
|
||||||
|
end_time = time.perf_counter()
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
|
||||||
|
start_cpu_time.user + start_cpu_time.system
|
||||||
|
)
|
||||||
|
current, peak = tracemalloc.get_traced_memory()
|
||||||
|
|
||||||
|
# Store results
|
||||||
|
execution_times.append(execution_time)
|
||||||
|
peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB
|
||||||
|
cpu_percentages.append((cpu_time / execution_time) * 100)
|
||||||
|
|
||||||
|
tracemalloc.stop()
|
||||||
|
|
||||||
|
analysis = {
|
||||||
|
"mean_execution_time": statistics.mean(execution_times),
|
||||||
|
"mean_peak_memory_mb": statistics.mean(peak_memory_usages),
|
||||||
|
"mean_cpu_percent": statistics.mean(cpu_percentages),
|
||||||
|
"num_runs": num_runs,
|
||||||
|
}
|
||||||
|
|
||||||
|
if num_runs > 1:
|
||||||
|
analysis["std_execution_time"] = statistics.stdev(execution_times)
|
||||||
|
|
||||||
|
return analysis
|
||||||
|
|
@ -0,0 +1,63 @@
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from .benchmark_function import benchmark_function
|
||||||
|
|
||||||
|
from cognee.modules.graph.utils import get_graph_from_model
|
||||||
|
from cognee.tests.unit.interfaces.graph.util import (
|
||||||
|
PERSON_NAMES,
|
||||||
|
create_organization_recursive,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Benchmark graph model with configurable recursive depth"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--recursive-depth",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="Recursive depth for graph generation (default: 3)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
society = create_organization_recursive(
|
||||||
|
"society", "Society", PERSON_NAMES, args.recursive_depth
|
||||||
|
)
|
||||||
|
added_nodes = {}
|
||||||
|
added_edges = {}
|
||||||
|
visited_properties = {}
|
||||||
|
nodes, edges = asyncio.run(
|
||||||
|
get_graph_from_model(
|
||||||
|
society,
|
||||||
|
added_nodes=added_nodes,
|
||||||
|
added_edges=added_edges,
|
||||||
|
visited_properties=visited_properties,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_graph_from_model_sync(model):
|
||||||
|
added_nodes = {}
|
||||||
|
added_edges = {}
|
||||||
|
visited_properties = {}
|
||||||
|
|
||||||
|
return asyncio.run(
|
||||||
|
get_graph_from_model(
|
||||||
|
model,
|
||||||
|
added_nodes=added_nodes,
|
||||||
|
added_edges=added_edges,
|
||||||
|
visited_properties=visited_properties,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
|
||||||
|
print("\nBenchmark Results:")
|
||||||
|
print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
|
||||||
|
print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
|
||||||
|
print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
|
||||||
|
print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
|
||||||
|
|
||||||
|
if "std_execution_time" in results:
|
||||||
|
print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
|
||||||
10
profiling/util/DummyEmbeddingEngine.py
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
import numpy as np
|
||||||
|
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||||
|
|
||||||
|
|
||||||
|
class DummyEmbeddingEngine(EmbeddingEngine):
|
||||||
|
async def embed_text(self, text: list[str]) -> list[list[float]]:
|
||||||
|
return list(list(np.random.randn(3072)))
|
||||||
|
|
||||||
|
def get_vector_size(self) -> int:
|
||||||
|
return 3072
|
||||||
59
profiling/util/DummyLLMAdapter.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
from typing import Type
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import spacy
|
||||||
|
import textacy
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||||
|
from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
|
||||||
|
|
||||||
|
|
||||||
|
class DummyLLMAdapter(LLMInterface):
|
||||||
|
nlp = spacy.load("en_core_web_sm")
|
||||||
|
|
||||||
|
async def acreate_structured_output(
|
||||||
|
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
||||||
|
) -> BaseModel:
|
||||||
|
if str(response_model) == "<class 'cognee.shared.data_models.SummarizedContent'>":
|
||||||
|
return dummy_summarize_content(text_input)
|
||||||
|
elif str(response_model) == "<class 'cognee.shared.data_models.KnowledgeGraph'>":
|
||||||
|
return dummy_extract_knowledge_graph(text_input, self.nlp)
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
"Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def dummy_extract_knowledge_graph(text, nlp):
|
||||||
|
doc = nlp(text)
|
||||||
|
triples = list(textacy.extract.subject_verb_object_triples(doc))
|
||||||
|
|
||||||
|
nodes = {}
|
||||||
|
edges = []
|
||||||
|
for triple in triples:
|
||||||
|
source = "_".join([str(e) for e in triple.subject])
|
||||||
|
target = "_".join([str(e) for e in triple.object])
|
||||||
|
nodes[source] = nodes.get(
|
||||||
|
source, Node(id=str(uuid4()), name=source, type="object", description="")
|
||||||
|
)
|
||||||
|
nodes[target] = nodes.get(
|
||||||
|
target, Node(id=str(uuid4()), name=target, type="object", description="")
|
||||||
|
)
|
||||||
|
edge_type = "_".join([str(e) for e in triple.verb])
|
||||||
|
edges.append(
|
||||||
|
Edge(
|
||||||
|
source_node_id=nodes[source].id,
|
||||||
|
target_node_id=nodes[target].id,
|
||||||
|
relationship_name=edge_type,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
|
||||||
|
|
||||||
|
|
||||||
|
def dummy_summarize_content(text):
|
||||||
|
words = [(word, len(word)) for word in set(text.split(" "))]
|
||||||
|
words = sorted(words, key=lambda x: x[1], reverse=True)
|
||||||
|
summary = " ".join([word for word, _ in words[:50]])
|
||||||
|
description = " ".join([word for word, _ in words[:10]])
|
||||||
|
return SummarizedContent(summary=summary, description=description)
|
||||||