Clean up core cognee repo
|
|
@ -1,28 +0,0 @@
|
||||||
'''
|
|
||||||
Given a string, find the length of the longest substring without repeating characters.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
Given "abcabcbb", the answer is "abc", which the length is 3.
|
|
||||||
|
|
||||||
Given "bbbbb", the answer is "b", with the length of 1.
|
|
||||||
|
|
||||||
Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
|
|
||||||
'''
|
|
||||||
|
|
||||||
class Solution(object):
|
|
||||||
def lengthOfLongestSubstring(self, s):
|
|
||||||
"""
|
|
||||||
:type s: str
|
|
||||||
:rtype: int
|
|
||||||
"""
|
|
||||||
mapSet = {}
|
|
||||||
start, result = 0, 0
|
|
||||||
|
|
||||||
for end in range(len(s)):
|
|
||||||
if s[end] in mapSet:
|
|
||||||
start = max(mapSet[s[end]], start)
|
|
||||||
result = max(result, end-start+1)
|
|
||||||
mapSet[s[end]] = end+1
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
Before Width: | Height: | Size: 10 KiB |
|
|
@ -1,6 +0,0 @@
|
||||||
# put your configuration values here
|
|
||||||
|
|
||||||
[runtime]
|
|
||||||
log_level = "WARNING" # the system log level of dlt
|
|
||||||
# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry
|
|
||||||
dlthub_telemetry = false
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
bin
|
tools/bin
|
||||||
dist
|
dist
|
||||||
docs
|
docs
|
||||||
evals
|
evals
|
||||||
|
|
|
||||||
2
.gitignore
vendored
|
|
@ -1,4 +1,4 @@
|
||||||
.data
|
examples/.data
|
||||||
.env
|
.env
|
||||||
.local.env
|
.local.env
|
||||||
.prod.env
|
.prod.env
|
||||||
|
|
|
||||||
|
|
@ -35,9 +35,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
|
||||||
<p align="center">
|
<p align="center">
|
||||||
🌐 Available Languages
|
🌐 Available Languages
|
||||||
:
|
:
|
||||||
<a href="community/README.pt.md">🇵🇹 Português</a>
|
<a href="assets/community/README.pt.md">🇵🇹 Português</a>
|
||||||
·
|
·
|
||||||
<a href="community/README.zh.md">🇨🇳 [中文]</a>
|
<a href="assets/community/README.zh.md">🇨🇳 [中文]</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<div style="text-align: center">
|
<div style="text-align: center">
|
||||||
|
|
|
||||||
117
alembic.ini
|
|
@ -1,117 +0,0 @@
|
||||||
# A generic, single database configuration.
|
|
||||||
|
|
||||||
[alembic]
|
|
||||||
# path to migration scripts
|
|
||||||
# Use forward slashes (/) also on windows to provide an os agnostic path
|
|
||||||
script_location = alembic
|
|
||||||
|
|
||||||
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
|
||||||
# Uncomment the line below if you want the files to be prepended with date and time
|
|
||||||
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
|
||||||
# for all available tokens
|
|
||||||
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
|
||||||
|
|
||||||
# sys.path path, will be prepended to sys.path if present.
|
|
||||||
# defaults to the current working directory.
|
|
||||||
prepend_sys_path = .
|
|
||||||
|
|
||||||
# timezone to use when rendering the date within the migration file
|
|
||||||
# as well as the filename.
|
|
||||||
# If specified, requires the python>=3.9 or backports.zoneinfo library.
|
|
||||||
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
|
||||||
# string value is passed to ZoneInfo()
|
|
||||||
# leave blank for localtime
|
|
||||||
# timezone =
|
|
||||||
|
|
||||||
# max length of characters to apply to the "slug" field
|
|
||||||
# truncate_slug_length = 40
|
|
||||||
|
|
||||||
# set to 'true' to run the environment during
|
|
||||||
# the 'revision' command, regardless of autogenerate
|
|
||||||
# revision_environment = false
|
|
||||||
|
|
||||||
# set to 'true' to allow .pyc and .pyo files without
|
|
||||||
# a source .py file to be detected as revisions in the
|
|
||||||
# versions/ directory
|
|
||||||
# sourceless = false
|
|
||||||
|
|
||||||
# version location specification; This defaults
|
|
||||||
# to alembic/versions. When using multiple version
|
|
||||||
# directories, initial revisions must be specified with --version-path.
|
|
||||||
# The path separator used here should be the separator specified by "version_path_separator" below.
|
|
||||||
# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
|
|
||||||
|
|
||||||
# version path separator; As mentioned above, this is the character used to split
|
|
||||||
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
|
||||||
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
|
||||||
# Valid values for version_path_separator are:
|
|
||||||
#
|
|
||||||
# version_path_separator = :
|
|
||||||
# version_path_separator = ;
|
|
||||||
# version_path_separator = space
|
|
||||||
# version_path_separator = newline
|
|
||||||
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
|
||||||
|
|
||||||
# set to 'true' to search source files recursively
|
|
||||||
# in each "version_locations" directory
|
|
||||||
# new in Alembic version 1.10
|
|
||||||
# recursive_version_locations = false
|
|
||||||
|
|
||||||
# the output encoding used when revision files
|
|
||||||
# are written from script.py.mako
|
|
||||||
# output_encoding = utf-8
|
|
||||||
|
|
||||||
sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s
|
|
||||||
|
|
||||||
|
|
||||||
[post_write_hooks]
|
|
||||||
# post_write_hooks defines scripts or Python functions that are run
|
|
||||||
# on newly generated revision scripts. See the documentation for further
|
|
||||||
# detail and examples
|
|
||||||
|
|
||||||
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
|
||||||
# hooks = black
|
|
||||||
# black.type = console_scripts
|
|
||||||
# black.entrypoint = black
|
|
||||||
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
|
||||||
|
|
||||||
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
|
||||||
# hooks = ruff
|
|
||||||
# ruff.type = exec
|
|
||||||
# ruff.executable = %(here)s/.venv/bin/ruff
|
|
||||||
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
|
||||||
|
|
||||||
# Logging configuration
|
|
||||||
[loggers]
|
|
||||||
keys = root,sqlalchemy,alembic
|
|
||||||
|
|
||||||
[handlers]
|
|
||||||
keys = console
|
|
||||||
|
|
||||||
[formatters]
|
|
||||||
keys = generic
|
|
||||||
|
|
||||||
[logger_root]
|
|
||||||
level = WARN
|
|
||||||
handlers = console
|
|
||||||
qualname =
|
|
||||||
|
|
||||||
[logger_sqlalchemy]
|
|
||||||
level = WARN
|
|
||||||
handlers =
|
|
||||||
qualname = sqlalchemy.engine
|
|
||||||
|
|
||||||
[logger_alembic]
|
|
||||||
level = INFO
|
|
||||||
handlers =
|
|
||||||
qualname = alembic
|
|
||||||
|
|
||||||
[handler_console]
|
|
||||||
class = StreamHandler
|
|
||||||
args = (sys.stderr,)
|
|
||||||
level = NOTSET
|
|
||||||
formatter = generic
|
|
||||||
|
|
||||||
[formatter_generic]
|
|
||||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
|
||||||
datefmt = %H:%M:%S
|
|
||||||
|
Before Width: | Height: | Size: 353 KiB After Width: | Height: | Size: 2.3 MiB |
|
Before Width: | Height: | Size: 262 KiB After Width: | Height: | Size: 262 KiB |
|
Before Width: | Height: | Size: 181 KiB After Width: | Height: | Size: 181 KiB |
153
cognee-gui.py
|
|
@ -1,153 +0,0 @@
|
||||||
import sys
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
try:
|
|
||||||
import cognee
|
|
||||||
from PySide6.QtWidgets import (
|
|
||||||
QApplication,
|
|
||||||
QWidget,
|
|
||||||
QPushButton,
|
|
||||||
QLineEdit,
|
|
||||||
QFileDialog,
|
|
||||||
QVBoxLayout,
|
|
||||||
QHBoxLayout,
|
|
||||||
QLabel,
|
|
||||||
QMessageBox,
|
|
||||||
QTextEdit,
|
|
||||||
QProgressDialog,
|
|
||||||
)
|
|
||||||
from PySide6.QtCore import Qt
|
|
||||||
|
|
||||||
from qasync import QEventLoop # Import QEventLoop from qasync
|
|
||||||
except ImportError as e:
|
|
||||||
print(
|
|
||||||
"\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n"
|
|
||||||
)
|
|
||||||
print("\nTo install with poetry use:")
|
|
||||||
print("\npoetry install -E gui\n")
|
|
||||||
print("\nOr to install with poetry and all dependencies use:")
|
|
||||||
print("\npoetry install --all-extras\n")
|
|
||||||
print("\nTo install with pip use: ")
|
|
||||||
print('\npip install ".[gui]"\n')
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
class FileSearchApp(QWidget):
|
|
||||||
def __init__(self):
|
|
||||||
super().__init__()
|
|
||||||
self.selected_file = None
|
|
||||||
self.init_ui()
|
|
||||||
|
|
||||||
def init_ui(self):
|
|
||||||
# Horizontal layout for file upload and visualization buttons
|
|
||||||
button_layout = QHBoxLayout()
|
|
||||||
|
|
||||||
# Button to open file dialog
|
|
||||||
self.file_button = QPushButton("Upload File to Cognee", parent=self)
|
|
||||||
self.file_button.clicked.connect(self.open_file_dialog)
|
|
||||||
button_layout.addWidget(self.file_button)
|
|
||||||
|
|
||||||
# Button to visualize data
|
|
||||||
self.visualize_button = QPushButton("Visualize Data", parent=self)
|
|
||||||
self.visualize_button.clicked.connect(lambda: asyncio.ensure_future(self.visualize_data()))
|
|
||||||
button_layout.addWidget(self.visualize_button)
|
|
||||||
|
|
||||||
# Label to display selected file path
|
|
||||||
self.file_label = QLabel("No file selected", parent=self)
|
|
||||||
|
|
||||||
# Line edit for search input
|
|
||||||
self.search_input = QLineEdit(parent=self)
|
|
||||||
self.search_input.setPlaceholderText("Enter text to search...")
|
|
||||||
|
|
||||||
# Button to perform search; schedule the async search on click
|
|
||||||
self.search_button = QPushButton("Cognee Search", parent=self)
|
|
||||||
self.search_button.clicked.connect(lambda: asyncio.ensure_future(self._cognee_search()))
|
|
||||||
|
|
||||||
# Text output area for search results
|
|
||||||
self.result_output = QTextEdit(parent=self)
|
|
||||||
self.result_output.setReadOnly(True)
|
|
||||||
self.result_output.setPlaceholderText("Search results will appear here...")
|
|
||||||
|
|
||||||
# Progress dialog
|
|
||||||
self.progress_dialog = QProgressDialog("Processing..", None, 0, 0, parent=self)
|
|
||||||
self.progress_dialog.setWindowModality(Qt.WindowModal)
|
|
||||||
self.progress_dialog.setCancelButton(None) # Remove the cancel button
|
|
||||||
self.progress_dialog.close()
|
|
||||||
|
|
||||||
# Layout setup
|
|
||||||
layout = QVBoxLayout()
|
|
||||||
layout.addLayout(button_layout)
|
|
||||||
layout.addWidget(self.file_label)
|
|
||||||
layout.addWidget(self.search_input)
|
|
||||||
layout.addWidget(self.search_button)
|
|
||||||
layout.addWidget(self.result_output)
|
|
||||||
|
|
||||||
self.setLayout(layout)
|
|
||||||
self.setWindowTitle("Cognee")
|
|
||||||
self.resize(500, 300)
|
|
||||||
|
|
||||||
def open_file_dialog(self):
|
|
||||||
file_path, _ = QFileDialog.getOpenFileName(
|
|
||||||
self, "Select a File", "", "All Files (*.*);;Text Files (*.txt)"
|
|
||||||
)
|
|
||||||
if file_path:
|
|
||||||
self.selected_file = file_path
|
|
||||||
self.file_label.setText(f"Selected: {file_path}")
|
|
||||||
asyncio.ensure_future(self.process_file_async())
|
|
||||||
|
|
||||||
async def process_file_async(self):
|
|
||||||
"""Asynchronously add and process the selected file."""
|
|
||||||
# Disable the entire window
|
|
||||||
self.progress_dialog.show()
|
|
||||||
self.setEnabled(False)
|
|
||||||
try:
|
|
||||||
await cognee.add(self.selected_file)
|
|
||||||
await cognee.cognify()
|
|
||||||
except Exception as e:
|
|
||||||
QMessageBox.critical(self, "Error", f"File processing failed: {str(e)}")
|
|
||||||
# Once finished, re-enable the window
|
|
||||||
self.setEnabled(True)
|
|
||||||
self.progress_dialog.close()
|
|
||||||
|
|
||||||
async def _cognee_search(self):
|
|
||||||
"""Performs an async search and updates the result output."""
|
|
||||||
# Disable the entire window
|
|
||||||
self.setEnabled(False)
|
|
||||||
self.progress_dialog.show()
|
|
||||||
|
|
||||||
try:
|
|
||||||
search_text = self.search_input.text().strip()
|
|
||||||
result = await cognee.search(query_text=search_text)
|
|
||||||
print(result)
|
|
||||||
# Assuming result is a list-like object; adjust if necessary
|
|
||||||
self.result_output.setText(result[0])
|
|
||||||
except Exception as e:
|
|
||||||
QMessageBox.critical(self, "Error", f"Search failed: {str(e)}")
|
|
||||||
|
|
||||||
# Once finished, re-enable the window
|
|
||||||
self.setEnabled(True)
|
|
||||||
self.progress_dialog.close()
|
|
||||||
|
|
||||||
async def visualize_data(self):
|
|
||||||
"""Async slot for handling visualize data button press."""
|
|
||||||
import webbrowser
|
|
||||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
html_file = os.path.join(pathlib.Path(__file__).parent, ".data", "graph_visualization.html")
|
|
||||||
await visualize_graph(html_file)
|
|
||||||
webbrowser.open(f"file://{html_file}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
app = QApplication(sys.argv)
|
|
||||||
# Create a qasync event loop and set it as the current event loop
|
|
||||||
loop = QEventLoop(app)
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
window = FileSearchApp()
|
|
||||||
window.show()
|
|
||||||
|
|
||||||
with loop:
|
|
||||||
loop.run_forever()
|
|
||||||
|
|
@ -21,12 +21,12 @@ WORKDIR /app
|
||||||
|
|
||||||
ENV PYTHONPATH=/app
|
ENV PYTHONPATH=/app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY pyproject.toml poetry.lock /app/
|
COPY ../pyproject.toml poetry.lock /app/
|
||||||
|
|
||||||
|
|
||||||
RUN pip install poetry
|
RUN pip install poetry
|
||||||
|
|
||||||
RUN poetry install --all-extras --no-root --without dev
|
RUN poetry install --all-extras --no-root --without dev
|
||||||
|
|
||||||
COPY cognee/ /app/cognee
|
COPY ../cognee /app/cognee
|
||||||
COPY README.md /app/README.md
|
COPY ../README.md /app/README.md
|
||||||
|
|
@ -4,8 +4,8 @@ services:
|
||||||
networks:
|
networks:
|
||||||
- cognee-network
|
- cognee-network
|
||||||
build:
|
build:
|
||||||
context: .
|
context: ..
|
||||||
dockerfile: Dockerfile
|
dockerfile: ../Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ./cognee:/app/cognee
|
- ./cognee:/app/cognee
|
||||||
- .env:/app/.env
|
- .env:/app/.env
|
||||||
|
|
@ -33,8 +33,8 @@ services:
|
||||||
profiles:
|
profiles:
|
||||||
- ui
|
- ui
|
||||||
build:
|
build:
|
||||||
context: ./cognee-frontend
|
context: ../cognee-frontend
|
||||||
dockerfile: Dockerfile
|
dockerfile: ../cognee-frontend/Dockerfile
|
||||||
volumes:
|
volumes:
|
||||||
- ./cognee-frontend/src:/app/src
|
- ./cognee-frontend/src:/app/src
|
||||||
- ./cognee-frontend/public:/app/public
|
- ./cognee-frontend/public:/app/public
|
||||||
|
|
@ -12,8 +12,8 @@ app = modal.App("cognee-runner")
|
||||||
|
|
||||||
image = (
|
image = (
|
||||||
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
||||||
.copy_local_file("pyproject.toml", "pyproject.toml")
|
.copy_local_file("../pyproject.toml", "pyproject.toml")
|
||||||
.copy_local_file("poetry.lock", "poetry.lock")
|
.copy_local_file("../poetry.lock", "poetry.lock")
|
||||||
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
|
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
|
||||||
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
|
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
|
||||||
.pip_install("protobuf", "h2")
|
.pip_install("protobuf", "h2")
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
# Third party licenses
|
|
||||||
|
|
||||||
This folder contains the licenses of third-party open-source software that has been redistributed in this project.
|
|
||||||
Details of included files and modifications can be found in [NOTICE](/NOTICE.md).
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
import statistics
|
|
||||||
import time
|
|
||||||
import tracemalloc
|
|
||||||
from typing import Any, Callable, Dict
|
|
||||||
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Benchmark a function for memory usage and computational performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
func: Function to benchmark
|
|
||||||
*args: Arguments to pass to the function
|
|
||||||
num_runs: Number of times to run the benchmark
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing benchmark metrics
|
|
||||||
"""
|
|
||||||
execution_times = []
|
|
||||||
peak_memory_usages = []
|
|
||||||
cpu_percentages = []
|
|
||||||
|
|
||||||
process = psutil.Process()
|
|
||||||
|
|
||||||
for _ in range(num_runs):
|
|
||||||
# Start memory tracking
|
|
||||||
tracemalloc.start()
|
|
||||||
|
|
||||||
# Measure execution time and CPU usage
|
|
||||||
start_time = time.perf_counter()
|
|
||||||
start_cpu_time = process.cpu_times()
|
|
||||||
|
|
||||||
end_cpu_time = process.cpu_times()
|
|
||||||
end_time = time.perf_counter()
|
|
||||||
|
|
||||||
# Calculate metrics
|
|
||||||
execution_time = end_time - start_time
|
|
||||||
cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
|
|
||||||
start_cpu_time.user + start_cpu_time.system
|
|
||||||
)
|
|
||||||
current, peak = tracemalloc.get_traced_memory()
|
|
||||||
|
|
||||||
# Store results
|
|
||||||
execution_times.append(execution_time)
|
|
||||||
peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB
|
|
||||||
cpu_percentages.append((cpu_time / execution_time) * 100)
|
|
||||||
|
|
||||||
tracemalloc.stop()
|
|
||||||
|
|
||||||
analysis = {
|
|
||||||
"mean_execution_time": statistics.mean(execution_times),
|
|
||||||
"mean_peak_memory_mb": statistics.mean(peak_memory_usages),
|
|
||||||
"mean_cpu_percent": statistics.mean(cpu_percentages),
|
|
||||||
"num_runs": num_runs,
|
|
||||||
}
|
|
||||||
|
|
||||||
if num_runs > 1:
|
|
||||||
analysis["std_execution_time"] = statistics.stdev(execution_times)
|
|
||||||
|
|
||||||
return analysis
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
from .benchmark_function import benchmark_function
|
|
||||||
|
|
||||||
from cognee.modules.graph.utils import get_graph_from_model
|
|
||||||
from cognee.tests.unit.interfaces.graph.util import (
|
|
||||||
PERSON_NAMES,
|
|
||||||
create_organization_recursive,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Example usage:
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Benchmark graph model with configurable recursive depth"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--recursive-depth",
|
|
||||||
type=int,
|
|
||||||
default=3,
|
|
||||||
help="Recursive depth for graph generation (default: 3)",
|
|
||||||
)
|
|
||||||
parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
society = create_organization_recursive(
|
|
||||||
"society", "Society", PERSON_NAMES, args.recursive_depth
|
|
||||||
)
|
|
||||||
added_nodes = {}
|
|
||||||
added_edges = {}
|
|
||||||
visited_properties = {}
|
|
||||||
nodes, edges = asyncio.run(
|
|
||||||
get_graph_from_model(
|
|
||||||
society,
|
|
||||||
added_nodes=added_nodes,
|
|
||||||
added_edges=added_edges,
|
|
||||||
visited_properties=visited_properties,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_graph_from_model_sync(model):
|
|
||||||
added_nodes = {}
|
|
||||||
added_edges = {}
|
|
||||||
visited_properties = {}
|
|
||||||
|
|
||||||
return asyncio.run(
|
|
||||||
get_graph_from_model(
|
|
||||||
model,
|
|
||||||
added_nodes=added_nodes,
|
|
||||||
added_edges=added_edges,
|
|
||||||
visited_properties=visited_properties,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
|
|
||||||
print("\nBenchmark Results:")
|
|
||||||
print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
|
|
||||||
print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
|
|
||||||
print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
|
|
||||||
print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
|
|
||||||
|
|
||||||
if "std_execution_time" in results:
|
|
||||||
print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
|
|
||||||
|
|
||||||
class DummyEmbeddingEngine(EmbeddingEngine):
|
|
||||||
async def embed_text(self, text: list[str]) -> list[list[float]]:
|
|
||||||
return list(list(np.random.randn(3072)))
|
|
||||||
|
|
||||||
def get_vector_size(self) -> int:
|
|
||||||
return 3072
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
from typing import Type
|
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
import spacy
|
|
||||||
import textacy
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
|
||||||
from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
|
|
||||||
|
|
||||||
|
|
||||||
class DummyLLMAdapter(LLMInterface):
|
|
||||||
nlp = spacy.load("en_core_web_sm")
|
|
||||||
|
|
||||||
async def acreate_structured_output(
|
|
||||||
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
||||||
) -> BaseModel:
|
|
||||||
if str(response_model) == "<class 'cognee.shared.data_models.SummarizedContent'>":
|
|
||||||
return dummy_summarize_content(text_input)
|
|
||||||
elif str(response_model) == "<class 'cognee.shared.data_models.KnowledgeGraph'>":
|
|
||||||
return dummy_extract_knowledge_graph(text_input, self.nlp)
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def dummy_extract_knowledge_graph(text, nlp):
|
|
||||||
doc = nlp(text)
|
|
||||||
triples = list(textacy.extract.subject_verb_object_triples(doc))
|
|
||||||
|
|
||||||
nodes = {}
|
|
||||||
edges = []
|
|
||||||
for triple in triples:
|
|
||||||
source = "_".join([str(e) for e in triple.subject])
|
|
||||||
target = "_".join([str(e) for e in triple.object])
|
|
||||||
nodes[source] = nodes.get(
|
|
||||||
source, Node(id=str(uuid4()), name=source, type="object", description="")
|
|
||||||
)
|
|
||||||
nodes[target] = nodes.get(
|
|
||||||
target, Node(id=str(uuid4()), name=target, type="object", description="")
|
|
||||||
)
|
|
||||||
edge_type = "_".join([str(e) for e in triple.verb])
|
|
||||||
edges.append(
|
|
||||||
Edge(
|
|
||||||
source_node_id=nodes[source].id,
|
|
||||||
target_node_id=nodes[target].id,
|
|
||||||
relationship_name=edge_type,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
|
|
||||||
|
|
||||||
|
|
||||||
def dummy_summarize_content(text):
|
|
||||||
words = [(word, len(word)) for word in set(text.split(" "))]
|
|
||||||
words = sorted(words, key=lambda x: x[1], reverse=True)
|
|
||||||
summary = " ".join([word for word, _ in words[:50]])
|
|
||||||
description = " ".join([word for word, _ in words[:10]])
|
|
||||||
return SummarizedContent(summary=summary, description=description)
|
|
||||||