diff --git a/.data/code/example.txt b/.data/code/example.txt
new file mode 100644
index 000000000..4596a08eb
--- /dev/null
+++ b/.data/code/example.txt
@@ -0,0 +1,28 @@
+'''
+ Given a string, find the length of the longest substring without repeating characters.
+
+ Examples:
+
+ Given "abcabcbb", the answer is "abc", which the length is 3.
+
+ Given "bbbbb", the answer is "b", with the length of 1.
+
+ Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
+'''
+
+class Solution(object):
+ def lengthOfLongestSubstring(self, s):
+ """
+ :type s: str
+ :rtype: int
+ """
+ mapSet = {}
+ start, result = 0, 0
+
+ for end in range(len(s)):
+ if s[end] in mapSet:
+ start = max(mapSet[s[end]], start)
+ result = max(result, end-start+1)
+ mapSet[s[end]] = end+1
+
+ return result
diff --git a/.data/multimedia/example.png b/.data/multimedia/example.png
new file mode 100644
index 000000000..4d406cafd
Binary files /dev/null and b/.data/multimedia/example.png differ
diff --git a/.data/multimedia/text_to_speech.mp3 b/.data/multimedia/text_to_speech.mp3
new file mode 100644
index 000000000..e84aea505
Binary files /dev/null and b/.data/multimedia/text_to_speech.mp3 differ
diff --git a/.data/short_stories/soldiers-home.pdf b/.data/short_stories/soldiers-home.pdf
new file mode 100644
index 000000000..e453ca4bc
Binary files /dev/null and b/.data/short_stories/soldiers-home.pdf differ
diff --git a/.dlt/config.toml b/.dlt/config.toml
new file mode 100644
index 000000000..c72c145b5
--- /dev/null
+++ b/.dlt/config.toml
@@ -0,0 +1,6 @@
+# put your configuration values here
+
+[runtime]
+log_level = "WARNING" # the system log level of dlt
+# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry
+dlthub_telemetry = false
diff --git a/.dockerignore b/.dockerignore
index 77a93d28a..d2d26277f 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,4 @@
-tools/bin
+bin
dist
docs
evals
diff --git a/.gitignore b/.gitignore
index 1bfd41dd8..c99e3a58e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-examples/.data
+.data
.env
.local.env
.prod.env
diff --git a/deployment/Dockerfile_modal b/Dockerfile_modal
similarity index 82%
rename from deployment/Dockerfile_modal
rename to Dockerfile_modal
index 579dfd7b9..f8ca663a8 100644
--- a/deployment/Dockerfile_modal
+++ b/Dockerfile_modal
@@ -21,12 +21,12 @@ WORKDIR /app
ENV PYTHONPATH=/app
WORKDIR /app
-COPY ../pyproject.toml poetry.lock /app/
+COPY pyproject.toml poetry.lock /app/
RUN pip install poetry
RUN poetry install --all-extras --no-root --without dev
-COPY ../cognee /app/cognee
-COPY ../README.md /app/README.md
+COPY cognee/ /app/cognee
+COPY README.md /app/README.md
diff --git a/README.md b/README.md
index 32522e19c..8c1994b99 100644
--- a/README.md
+++ b/README.md
@@ -35,9 +35,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
🌐 Available Languages
:
- 🇵🇹 Português
+ 🇵🇹 Português
·
- 🇨🇳 [中文]
+ 🇨🇳 [中文]
diff --git a/alembic.ini b/alembic.ini
new file mode 100644
index 000000000..e7cb55ee6
--- /dev/null
+++ b/alembic.ini
@@ -0,0 +1,117 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = alembic
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to alembic/versions. When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+# version_path_separator = newline
+version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts. See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/assets/cognee_benefits.png b/assets/cognee_benefits.png
index db1e1cc42..d435bed05 100644
Binary files a/assets/cognee_benefits.png and b/assets/cognee_benefits.png differ
diff --git a/tools/bin/dockerize b/bin/dockerize
similarity index 100%
rename from tools/bin/dockerize
rename to bin/dockerize
diff --git a/cognee-gui.py b/cognee-gui.py
new file mode 100644
index 000000000..e62a08380
--- /dev/null
+++ b/cognee-gui.py
@@ -0,0 +1,153 @@
+import sys
+import asyncio
+
+try:
+ import cognee
+ from PySide6.QtWidgets import (
+ QApplication,
+ QWidget,
+ QPushButton,
+ QLineEdit,
+ QFileDialog,
+ QVBoxLayout,
+ QHBoxLayout,
+ QLabel,
+ QMessageBox,
+ QTextEdit,
+ QProgressDialog,
+ )
+ from PySide6.QtCore import Qt
+
+ from qasync import QEventLoop # Import QEventLoop from qasync
+except ImportError as e:
+ print(
+ "\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n"
+ )
+ print("\nTo install with poetry use:")
+ print("\npoetry install -E gui\n")
+ print("\nOr to install with poetry and all dependencies use:")
+ print("\npoetry install --all-extras\n")
+ print("\nTo install with pip use: ")
+ print('\npip install ".[gui]"\n')
+ raise e
+
+
+class FileSearchApp(QWidget):
+ def __init__(self):
+ super().__init__()
+ self.selected_file = None
+ self.init_ui()
+
+ def init_ui(self):
+ # Horizontal layout for file upload and visualization buttons
+ button_layout = QHBoxLayout()
+
+ # Button to open file dialog
+ self.file_button = QPushButton("Upload File to Cognee", parent=self)
+ self.file_button.clicked.connect(self.open_file_dialog)
+ button_layout.addWidget(self.file_button)
+
+ # Button to visualize data
+ self.visualize_button = QPushButton("Visualize Data", parent=self)
+ self.visualize_button.clicked.connect(lambda: asyncio.ensure_future(self.visualize_data()))
+ button_layout.addWidget(self.visualize_button)
+
+ # Label to display selected file path
+ self.file_label = QLabel("No file selected", parent=self)
+
+ # Line edit for search input
+ self.search_input = QLineEdit(parent=self)
+ self.search_input.setPlaceholderText("Enter text to search...")
+
+ # Button to perform search; schedule the async search on click
+ self.search_button = QPushButton("Cognee Search", parent=self)
+ self.search_button.clicked.connect(lambda: asyncio.ensure_future(self._cognee_search()))
+
+ # Text output area for search results
+ self.result_output = QTextEdit(parent=self)
+ self.result_output.setReadOnly(True)
+ self.result_output.setPlaceholderText("Search results will appear here...")
+
+ # Progress dialog
+ self.progress_dialog = QProgressDialog("Processing..", None, 0, 0, parent=self)
+ self.progress_dialog.setWindowModality(Qt.WindowModal)
+ self.progress_dialog.setCancelButton(None) # Remove the cancel button
+ self.progress_dialog.close()
+
+ # Layout setup
+ layout = QVBoxLayout()
+ layout.addLayout(button_layout)
+ layout.addWidget(self.file_label)
+ layout.addWidget(self.search_input)
+ layout.addWidget(self.search_button)
+ layout.addWidget(self.result_output)
+
+ self.setLayout(layout)
+ self.setWindowTitle("Cognee")
+ self.resize(500, 300)
+
+ def open_file_dialog(self):
+ file_path, _ = QFileDialog.getOpenFileName(
+ self, "Select a File", "", "All Files (*.*);;Text Files (*.txt)"
+ )
+ if file_path:
+ self.selected_file = file_path
+ self.file_label.setText(f"Selected: {file_path}")
+ asyncio.ensure_future(self.process_file_async())
+
+ async def process_file_async(self):
+ """Asynchronously add and process the selected file."""
+ # Disable the entire window
+ self.progress_dialog.show()
+ self.setEnabled(False)
+ try:
+ await cognee.add(self.selected_file)
+ await cognee.cognify()
+ except Exception as e:
+ QMessageBox.critical(self, "Error", f"File processing failed: {str(e)}")
+ # Once finished, re-enable the window
+ self.setEnabled(True)
+ self.progress_dialog.close()
+
+ async def _cognee_search(self):
+ """Performs an async search and updates the result output."""
+ # Disable the entire window
+ self.setEnabled(False)
+ self.progress_dialog.show()
+
+ try:
+ search_text = self.search_input.text().strip()
+ result = await cognee.search(query_text=search_text)
+ print(result)
+ # Assuming result is a list-like object; adjust if necessary
+ self.result_output.setText(result[0])
+ except Exception as e:
+ QMessageBox.critical(self, "Error", f"Search failed: {str(e)}")
+
+ # Once finished, re-enable the window
+ self.setEnabled(True)
+ self.progress_dialog.close()
+
+ async def visualize_data(self):
+ """Async slot for handling visualize data button press."""
+ import webbrowser
+ from cognee.api.v1.visualize.visualize import visualize_graph
+ import os
+ import pathlib
+
+ html_file = os.path.join(pathlib.Path(__file__).parent, ".data", "graph_visualization.html")
+ await visualize_graph(html_file)
+ webbrowser.open(f"file://{html_file}")
+
+
+if __name__ == "__main__":
+ app = QApplication(sys.argv)
+ # Create a qasync event loop and set it as the current event loop
+ loop = QEventLoop(app)
+ asyncio.set_event_loop(loop)
+
+ window = FileSearchApp()
+ window.show()
+
+ with loop:
+ loop.run_forever()
diff --git a/assets/community/README.zh.md b/community/README.zh.md
similarity index 100%
rename from assets/community/README.zh.md
rename to community/README.zh.md
diff --git a/assets/community/cognee_benefits_zh.JPG b/community/cognee_benefits_zh.JPG
similarity index 100%
rename from assets/community/cognee_benefits_zh.JPG
rename to community/cognee_benefits_zh.JPG
diff --git a/assets/community/cognee_diagram_zh.JPG b/community/cognee_diagram_zh.JPG
similarity index 100%
rename from assets/community/cognee_diagram_zh.JPG
rename to community/cognee_diagram_zh.JPG
diff --git a/deployment/docker-compose.yml b/docker-compose.yml
similarity index 95%
rename from deployment/docker-compose.yml
rename to docker-compose.yml
index 81773eb28..91e3291b7 100644
--- a/deployment/docker-compose.yml
+++ b/docker-compose.yml
@@ -4,8 +4,8 @@ services:
networks:
- cognee-network
build:
- context: ..
- dockerfile: ../Dockerfile
+ context: .
+ dockerfile: Dockerfile
volumes:
- ./cognee:/app/cognee
- .env:/app/.env
@@ -33,8 +33,8 @@ services:
profiles:
- ui
build:
- context: ../cognee-frontend
- dockerfile: ../cognee-frontend/Dockerfile
+ context: ./cognee-frontend
+ dockerfile: Dockerfile
volumes:
- ./cognee-frontend/src:/app/src
- ./cognee-frontend/public:/app/public
diff --git a/deployment/entrypoint.sh b/entrypoint.sh
similarity index 100%
rename from deployment/entrypoint.sh
rename to entrypoint.sh
diff --git a/deployment/helm/Chart.yaml b/helm/Chart.yaml
similarity index 100%
rename from deployment/helm/Chart.yaml
rename to helm/Chart.yaml
diff --git a/deployment/helm/Dockerfile b/helm/Dockerfile
similarity index 100%
rename from deployment/helm/Dockerfile
rename to helm/Dockerfile
diff --git a/deployment/helm/README.md b/helm/README.md
similarity index 100%
rename from deployment/helm/README.md
rename to helm/README.md
diff --git a/deployment/helm/docker-compose-helm.yml b/helm/docker-compose-helm.yml
similarity index 100%
rename from deployment/helm/docker-compose-helm.yml
rename to helm/docker-compose-helm.yml
diff --git a/deployment/helm/templates/cognee_deployment.yaml b/helm/templates/cognee_deployment.yaml
similarity index 100%
rename from deployment/helm/templates/cognee_deployment.yaml
rename to helm/templates/cognee_deployment.yaml
diff --git a/deployment/helm/templates/cognee_service.yaml b/helm/templates/cognee_service.yaml
similarity index 100%
rename from deployment/helm/templates/cognee_service.yaml
rename to helm/templates/cognee_service.yaml
diff --git a/deployment/helm/templates/postgres_deployment.yaml b/helm/templates/postgres_deployment.yaml
similarity index 100%
rename from deployment/helm/templates/postgres_deployment.yaml
rename to helm/templates/postgres_deployment.yaml
diff --git a/deployment/helm/templates/postgres_pvc.yaml b/helm/templates/postgres_pvc.yaml
similarity index 100%
rename from deployment/helm/templates/postgres_pvc.yaml
rename to helm/templates/postgres_pvc.yaml
diff --git a/deployment/helm/templates/postgres_service.yaml b/helm/templates/postgres_service.yaml
similarity index 100%
rename from deployment/helm/templates/postgres_service.yaml
rename to helm/templates/postgres_service.yaml
diff --git a/deployment/helm/values.yaml b/helm/values.yaml
similarity index 100%
rename from deployment/helm/values.yaml
rename to helm/values.yaml
diff --git a/licenses/README.md b/licenses/README.md
new file mode 100644
index 000000000..8b3d13963
--- /dev/null
+++ b/licenses/README.md
@@ -0,0 +1,4 @@
+# Third party licenses
+
+This folder contains the licenses of third-party open-source software that has been redistributed in this project.
+Details of included files and modifications can be found in [NOTICE](/NOTICE.md).
diff --git a/deployment/modal_deployment.py b/modal_deployment.py
similarity index 97%
rename from deployment/modal_deployment.py
rename to modal_deployment.py
index cf1cf32e3..4c2ff7d5d 100644
--- a/deployment/modal_deployment.py
+++ b/modal_deployment.py
@@ -12,8 +12,8 @@ app = modal.App("cognee-runner")
image = (
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
- .copy_local_file("../pyproject.toml", "pyproject.toml")
- .copy_local_file("../poetry.lock", "poetry.lock")
+ .copy_local_file("pyproject.toml", "pyproject.toml")
+ .copy_local_file("poetry.lock", "poetry.lock")
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
.pip_install("protobuf", "h2")
diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py
new file mode 100644
index 000000000..a4f5c839b
--- /dev/null
+++ b/profiling/graph_pydantic_conversion/benchmark_function.py
@@ -0,0 +1,62 @@
+import statistics
+import time
+import tracemalloc
+from typing import Any, Callable, Dict
+
+import psutil
+
+
+def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
+ """
+ Benchmark a function for memory usage and computational performance.
+
+ Args:
+ func: Function to benchmark
+ *args: Arguments to pass to the function
+ num_runs: Number of times to run the benchmark
+
+ Returns:
+ Dictionary containing benchmark metrics
+ """
+ execution_times = []
+ peak_memory_usages = []
+ cpu_percentages = []
+
+ process = psutil.Process()
+
+ for _ in range(num_runs):
+ # Start memory tracking
+ tracemalloc.start()
+
+ # Measure execution time and CPU usage
+ start_time = time.perf_counter()
+ start_cpu_time = process.cpu_times()
+
+ end_cpu_time = process.cpu_times()
+ end_time = time.perf_counter()
+
+ # Calculate metrics
+ execution_time = end_time - start_time
+ cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
+ start_cpu_time.user + start_cpu_time.system
+ )
+ current, peak = tracemalloc.get_traced_memory()
+
+ # Store results
+ execution_times.append(execution_time)
+ peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB
+ cpu_percentages.append((cpu_time / execution_time) * 100)
+
+ tracemalloc.stop()
+
+ analysis = {
+ "mean_execution_time": statistics.mean(execution_times),
+ "mean_peak_memory_mb": statistics.mean(peak_memory_usages),
+ "mean_cpu_percent": statistics.mean(cpu_percentages),
+ "num_runs": num_runs,
+ }
+
+ if num_runs > 1:
+ analysis["std_execution_time"] = statistics.stdev(execution_times)
+
+ return analysis
diff --git a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
new file mode 100644
index 000000000..c1c0b6756
--- /dev/null
+++ b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
@@ -0,0 +1,63 @@
+import argparse
+import asyncio
+
+from .benchmark_function import benchmark_function
+
+from cognee.modules.graph.utils import get_graph_from_model
+from cognee.tests.unit.interfaces.graph.util import (
+ PERSON_NAMES,
+ create_organization_recursive,
+)
+
+# Example usage:
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Benchmark graph model with configurable recursive depth"
+ )
+ parser.add_argument(
+ "--recursive-depth",
+ type=int,
+ default=3,
+ help="Recursive depth for graph generation (default: 3)",
+ )
+ parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
+ args = parser.parse_args()
+
+ society = create_organization_recursive(
+ "society", "Society", PERSON_NAMES, args.recursive_depth
+ )
+ added_nodes = {}
+ added_edges = {}
+ visited_properties = {}
+ nodes, edges = asyncio.run(
+ get_graph_from_model(
+ society,
+ added_nodes=added_nodes,
+ added_edges=added_edges,
+ visited_properties=visited_properties,
+ )
+ )
+
+ def get_graph_from_model_sync(model):
+ added_nodes = {}
+ added_edges = {}
+ visited_properties = {}
+
+ return asyncio.run(
+ get_graph_from_model(
+ model,
+ added_nodes=added_nodes,
+ added_edges=added_edges,
+ visited_properties=visited_properties,
+ )
+ )
+
+ results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
+ print("\nBenchmark Results:")
+ print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
+ print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
+ print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
+ print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
+
+ if "std_execution_time" in results:
+ print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
diff --git a/profiling/util/DummyEmbeddingEngine.py b/profiling/util/DummyEmbeddingEngine.py
new file mode 100644
index 000000000..0ba742182
--- /dev/null
+++ b/profiling/util/DummyEmbeddingEngine.py
@@ -0,0 +1,10 @@
+import numpy as np
+from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
+
+
+class DummyEmbeddingEngine(EmbeddingEngine):
+ async def embed_text(self, text: list[str]) -> list[list[float]]:
+ return list(list(np.random.randn(3072)))
+
+ def get_vector_size(self) -> int:
+ return 3072
diff --git a/profiling/util/DummyLLMAdapter.py b/profiling/util/DummyLLMAdapter.py
new file mode 100644
index 000000000..b28261665
--- /dev/null
+++ b/profiling/util/DummyLLMAdapter.py
@@ -0,0 +1,59 @@
+from typing import Type
+from uuid import uuid4
+
+import spacy
+import textacy
+from pydantic import BaseModel
+
+from cognee.infrastructure.llm.llm_interface import LLMInterface
+from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
+
+
+class DummyLLMAdapter(LLMInterface):
+ nlp = spacy.load("en_core_web_sm")
+
+ async def acreate_structured_output(
+ self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+ ) -> BaseModel:
+ if str(response_model) == "":
+ return dummy_summarize_content(text_input)
+ elif str(response_model) == "":
+ return dummy_extract_knowledge_graph(text_input, self.nlp)
+ else:
+ raise Exception(
+ "Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
+ )
+
+
+def dummy_extract_knowledge_graph(text, nlp):
+ doc = nlp(text)
+ triples = list(textacy.extract.subject_verb_object_triples(doc))
+
+ nodes = {}
+ edges = []
+ for triple in triples:
+ source = "_".join([str(e) for e in triple.subject])
+ target = "_".join([str(e) for e in triple.object])
+ nodes[source] = nodes.get(
+ source, Node(id=str(uuid4()), name=source, type="object", description="")
+ )
+ nodes[target] = nodes.get(
+ target, Node(id=str(uuid4()), name=target, type="object", description="")
+ )
+ edge_type = "_".join([str(e) for e in triple.verb])
+ edges.append(
+ Edge(
+ source_node_id=nodes[source].id,
+ target_node_id=nodes[target].id,
+ relationship_name=edge_type,
+ )
+ )
+ return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
+
+
+def dummy_summarize_content(text):
+ words = [(word, len(word)) for word in set(text.split(" "))]
+ words = sorted(words, key=lambda x: x[1], reverse=True)
+ summary = " ".join([word for word, _ in words[:50]])
+ description = " ".join([word for word, _ in words[:10]])
+ return SummarizedContent(summary=summary, description=description)