diff --git a/.data/code/example.txt b/.data/code/example.txt deleted file mode 100644 index 4596a08eb..000000000 --- a/.data/code/example.txt +++ /dev/null @@ -1,28 +0,0 @@ -''' - Given a string, find the length of the longest substring without repeating characters. - - Examples: - - Given "abcabcbb", the answer is "abc", which the length is 3. - - Given "bbbbb", the answer is "b", with the length of 1. - - Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring. -''' - -class Solution(object): - def lengthOfLongestSubstring(self, s): - """ - :type s: str - :rtype: int - """ - mapSet = {} - start, result = 0, 0 - - for end in range(len(s)): - if s[end] in mapSet: - start = max(mapSet[s[end]], start) - result = max(result, end-start+1) - mapSet[s[end]] = end+1 - - return result diff --git a/.data/multimedia/example.png b/.data/multimedia/example.png deleted file mode 100644 index 4d406cafd..000000000 Binary files a/.data/multimedia/example.png and /dev/null differ diff --git a/.data/multimedia/text_to_speech.mp3 b/.data/multimedia/text_to_speech.mp3 deleted file mode 100644 index e84aea505..000000000 Binary files a/.data/multimedia/text_to_speech.mp3 and /dev/null differ diff --git a/.data/short_stories/soldiers-home.pdf b/.data/short_stories/soldiers-home.pdf deleted file mode 100644 index e453ca4bc..000000000 Binary files a/.data/short_stories/soldiers-home.pdf and /dev/null differ diff --git a/.dlt/config.toml b/.dlt/config.toml deleted file mode 100644 index c72c145b5..000000000 --- a/.dlt/config.toml +++ /dev/null @@ -1,6 +0,0 @@ -# put your configuration values here - -[runtime] -log_level = "WARNING" # the system log level of dlt -# use the dlthub_telemetry setting to enable/disable anonymous usage data reporting, see https://dlthub.com/docs/telemetry -dlthub_telemetry = false diff --git a/.dockerignore b/.dockerignore index d2d26277f..77a93d28a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,4 @@ -bin +tools/bin dist docs evals diff --git a/.gitignore b/.gitignore index c99e3a58e..1bfd41dd8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.data +examples/.data .env .local.env .prod.env diff --git a/README.md b/README.md index 8c1994b99..32522e19c 100644 --- a/README.md +++ b/README.md @@ -35,9 +35,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github

🌐 Available Languages : - 🇵🇹 Português + 🇵🇹 Português · - 🇨🇳 [中文] + 🇨🇳 [中文]

diff --git a/alembic.ini b/alembic.ini deleted file mode 100644 index e7cb55ee6..000000000 --- a/alembic.ini +++ /dev/null @@ -1,117 +0,0 @@ -# A generic, single database configuration. - -[alembic] -# path to migration scripts -# Use forward slashes (/) also on windows to provide an os agnostic path -script_location = alembic - -# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s -# Uncomment the line below if you want the files to be prepended with date and time -# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file -# for all available tokens -# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s - -# sys.path path, will be prepended to sys.path if present. -# defaults to the current working directory. -prepend_sys_path = . - -# timezone to use when rendering the date within the migration file -# as well as the filename. -# If specified, requires the python>=3.9 or backports.zoneinfo library. -# Any required deps can installed by adding `alembic[tz]` to the pip requirements -# string value is passed to ZoneInfo() -# leave blank for localtime -# timezone = - -# max length of characters to apply to the "slug" field -# truncate_slug_length = 40 - -# set to 'true' to run the environment during -# the 'revision' command, regardless of autogenerate -# revision_environment = false - -# set to 'true' to allow .pyc and .pyo files without -# a source .py file to be detected as revisions in the -# versions/ directory -# sourceless = false - -# version location specification; This defaults -# to alembic/versions. When using multiple version -# directories, initial revisions must be specified with --version-path. -# The path separator used here should be the separator specified by "version_path_separator" below. -# version_locations = %(here)s/bar:%(here)s/bat:alembic/versions - -# version path separator; As mentioned above, this is the character used to split -# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep. -# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas. -# Valid values for version_path_separator are: -# -# version_path_separator = : -# version_path_separator = ; -# version_path_separator = space -# version_path_separator = newline -version_path_separator = os # Use os.pathsep. Default configuration used for new projects. - -# set to 'true' to search source files recursively -# in each "version_locations" directory -# new in Alembic version 1.10 -# recursive_version_locations = false - -# the output encoding used when revision files -# are written from script.py.mako -# output_encoding = utf-8 - -sqlalchemy.url = %(SQLALCHEMY_DATABASE_URI)s - - -[post_write_hooks] -# post_write_hooks defines scripts or Python functions that are run -# on newly generated revision scripts. See the documentation for further -# detail and examples - -# format using "black" - use the console_scripts runner, against the "black" entrypoint -# hooks = black -# black.type = console_scripts -# black.entrypoint = black -# black.options = -l 79 REVISION_SCRIPT_FILENAME - -# lint with attempts to fix using "ruff" - use the exec runner, execute a binary -# hooks = ruff -# ruff.type = exec -# ruff.executable = %(here)s/.venv/bin/ruff -# ruff.options = --fix REVISION_SCRIPT_FILENAME - -# Logging configuration -[loggers] -keys = root,sqlalchemy,alembic - -[handlers] -keys = console - -[formatters] -keys = generic - -[logger_root] -level = WARN -handlers = console -qualname = - -[logger_sqlalchemy] -level = WARN -handlers = -qualname = sqlalchemy.engine - -[logger_alembic] -level = INFO -handlers = -qualname = alembic - -[handler_console] -class = StreamHandler -args = (sys.stderr,) -level = NOTSET -formatter = generic - -[formatter_generic] -format = %(levelname)-5.5s [%(name)s] %(message)s -datefmt = %H:%M:%S diff --git a/assets/cognee_benefits.png b/assets/cognee_benefits.png index d435bed05..db1e1cc42 100644 Binary files a/assets/cognee_benefits.png and b/assets/cognee_benefits.png differ diff --git a/community/README.zh.md b/assets/community/README.zh.md similarity index 100% rename from community/README.zh.md rename to assets/community/README.zh.md diff --git a/community/cognee_benefits_zh.JPG b/assets/community/cognee_benefits_zh.JPG similarity index 100% rename from community/cognee_benefits_zh.JPG rename to assets/community/cognee_benefits_zh.JPG diff --git a/community/cognee_diagram_zh.JPG b/assets/community/cognee_diagram_zh.JPG similarity index 100% rename from community/cognee_diagram_zh.JPG rename to assets/community/cognee_diagram_zh.JPG diff --git a/cognee-gui.py b/cognee-gui.py deleted file mode 100644 index e62a08380..000000000 --- a/cognee-gui.py +++ /dev/null @@ -1,153 +0,0 @@ -import sys -import asyncio - -try: - import cognee - from PySide6.QtWidgets import ( - QApplication, - QWidget, - QPushButton, - QLineEdit, - QFileDialog, - QVBoxLayout, - QHBoxLayout, - QLabel, - QMessageBox, - QTextEdit, - QProgressDialog, - ) - from PySide6.QtCore import Qt - - from qasync import QEventLoop # Import QEventLoop from qasync -except ImportError as e: - print( - "\nPlease install Cognee with optional gui dependencies or manually install missing dependencies.\n" - ) - print("\nTo install with poetry use:") - print("\npoetry install -E gui\n") - print("\nOr to install with poetry and all dependencies use:") - print("\npoetry install --all-extras\n") - print("\nTo install with pip use: ") - print('\npip install ".[gui]"\n') - raise e - - -class FileSearchApp(QWidget): - def __init__(self): - super().__init__() - self.selected_file = None - self.init_ui() - - def init_ui(self): - # Horizontal layout for file upload and visualization buttons - button_layout = QHBoxLayout() - - # Button to open file dialog - self.file_button = QPushButton("Upload File to Cognee", parent=self) - self.file_button.clicked.connect(self.open_file_dialog) - button_layout.addWidget(self.file_button) - - # Button to visualize data - self.visualize_button = QPushButton("Visualize Data", parent=self) - self.visualize_button.clicked.connect(lambda: asyncio.ensure_future(self.visualize_data())) - button_layout.addWidget(self.visualize_button) - - # Label to display selected file path - self.file_label = QLabel("No file selected", parent=self) - - # Line edit for search input - self.search_input = QLineEdit(parent=self) - self.search_input.setPlaceholderText("Enter text to search...") - - # Button to perform search; schedule the async search on click - self.search_button = QPushButton("Cognee Search", parent=self) - self.search_button.clicked.connect(lambda: asyncio.ensure_future(self._cognee_search())) - - # Text output area for search results - self.result_output = QTextEdit(parent=self) - self.result_output.setReadOnly(True) - self.result_output.setPlaceholderText("Search results will appear here...") - - # Progress dialog - self.progress_dialog = QProgressDialog("Processing..", None, 0, 0, parent=self) - self.progress_dialog.setWindowModality(Qt.WindowModal) - self.progress_dialog.setCancelButton(None) # Remove the cancel button - self.progress_dialog.close() - - # Layout setup - layout = QVBoxLayout() - layout.addLayout(button_layout) - layout.addWidget(self.file_label) - layout.addWidget(self.search_input) - layout.addWidget(self.search_button) - layout.addWidget(self.result_output) - - self.setLayout(layout) - self.setWindowTitle("Cognee") - self.resize(500, 300) - - def open_file_dialog(self): - file_path, _ = QFileDialog.getOpenFileName( - self, "Select a File", "", "All Files (*.*);;Text Files (*.txt)" - ) - if file_path: - self.selected_file = file_path - self.file_label.setText(f"Selected: {file_path}") - asyncio.ensure_future(self.process_file_async()) - - async def process_file_async(self): - """Asynchronously add and process the selected file.""" - # Disable the entire window - self.progress_dialog.show() - self.setEnabled(False) - try: - await cognee.add(self.selected_file) - await cognee.cognify() - except Exception as e: - QMessageBox.critical(self, "Error", f"File processing failed: {str(e)}") - # Once finished, re-enable the window - self.setEnabled(True) - self.progress_dialog.close() - - async def _cognee_search(self): - """Performs an async search and updates the result output.""" - # Disable the entire window - self.setEnabled(False) - self.progress_dialog.show() - - try: - search_text = self.search_input.text().strip() - result = await cognee.search(query_text=search_text) - print(result) - # Assuming result is a list-like object; adjust if necessary - self.result_output.setText(result[0]) - except Exception as e: - QMessageBox.critical(self, "Error", f"Search failed: {str(e)}") - - # Once finished, re-enable the window - self.setEnabled(True) - self.progress_dialog.close() - - async def visualize_data(self): - """Async slot for handling visualize data button press.""" - import webbrowser - from cognee.api.v1.visualize.visualize import visualize_graph - import os - import pathlib - - html_file = os.path.join(pathlib.Path(__file__).parent, ".data", "graph_visualization.html") - await visualize_graph(html_file) - webbrowser.open(f"file://{html_file}") - - -if __name__ == "__main__": - app = QApplication(sys.argv) - # Create a qasync event loop and set it as the current event loop - loop = QEventLoop(app) - asyncio.set_event_loop(loop) - - window = FileSearchApp() - window.show() - - with loop: - loop.run_forever() diff --git a/Dockerfile_modal b/deployment/Dockerfile_modal similarity index 82% rename from Dockerfile_modal rename to deployment/Dockerfile_modal index f8ca663a8..579dfd7b9 100644 --- a/Dockerfile_modal +++ b/deployment/Dockerfile_modal @@ -21,12 +21,12 @@ WORKDIR /app ENV PYTHONPATH=/app WORKDIR /app -COPY pyproject.toml poetry.lock /app/ +COPY ../pyproject.toml poetry.lock /app/ RUN pip install poetry RUN poetry install --all-extras --no-root --without dev -COPY cognee/ /app/cognee -COPY README.md /app/README.md +COPY ../cognee /app/cognee +COPY ../README.md /app/README.md diff --git a/docker-compose.yml b/deployment/docker-compose.yml similarity index 95% rename from docker-compose.yml rename to deployment/docker-compose.yml index 91e3291b7..81773eb28 100644 --- a/docker-compose.yml +++ b/deployment/docker-compose.yml @@ -4,8 +4,8 @@ services: networks: - cognee-network build: - context: . - dockerfile: Dockerfile + context: .. + dockerfile: ../Dockerfile volumes: - ./cognee:/app/cognee - .env:/app/.env @@ -33,8 +33,8 @@ services: profiles: - ui build: - context: ./cognee-frontend - dockerfile: Dockerfile + context: ../cognee-frontend + dockerfile: ../cognee-frontend/Dockerfile volumes: - ./cognee-frontend/src:/app/src - ./cognee-frontend/public:/app/public diff --git a/entrypoint.sh b/deployment/entrypoint.sh similarity index 100% rename from entrypoint.sh rename to deployment/entrypoint.sh diff --git a/helm/Chart.yaml b/deployment/helm/Chart.yaml similarity index 100% rename from helm/Chart.yaml rename to deployment/helm/Chart.yaml diff --git a/helm/Dockerfile b/deployment/helm/Dockerfile similarity index 100% rename from helm/Dockerfile rename to deployment/helm/Dockerfile diff --git a/helm/README.md b/deployment/helm/README.md similarity index 100% rename from helm/README.md rename to deployment/helm/README.md diff --git a/helm/docker-compose-helm.yml b/deployment/helm/docker-compose-helm.yml similarity index 100% rename from helm/docker-compose-helm.yml rename to deployment/helm/docker-compose-helm.yml diff --git a/helm/templates/cognee_deployment.yaml b/deployment/helm/templates/cognee_deployment.yaml similarity index 100% rename from helm/templates/cognee_deployment.yaml rename to deployment/helm/templates/cognee_deployment.yaml diff --git a/helm/templates/cognee_service.yaml b/deployment/helm/templates/cognee_service.yaml similarity index 100% rename from helm/templates/cognee_service.yaml rename to deployment/helm/templates/cognee_service.yaml diff --git a/helm/templates/postgres_deployment.yaml b/deployment/helm/templates/postgres_deployment.yaml similarity index 100% rename from helm/templates/postgres_deployment.yaml rename to deployment/helm/templates/postgres_deployment.yaml diff --git a/helm/templates/postgres_pvc.yaml b/deployment/helm/templates/postgres_pvc.yaml similarity index 100% rename from helm/templates/postgres_pvc.yaml rename to deployment/helm/templates/postgres_pvc.yaml diff --git a/helm/templates/postgres_service.yaml b/deployment/helm/templates/postgres_service.yaml similarity index 100% rename from helm/templates/postgres_service.yaml rename to deployment/helm/templates/postgres_service.yaml diff --git a/helm/values.yaml b/deployment/helm/values.yaml similarity index 100% rename from helm/values.yaml rename to deployment/helm/values.yaml diff --git a/modal_deployment.py b/deployment/modal_deployment.py similarity index 97% rename from modal_deployment.py rename to deployment/modal_deployment.py index 4c2ff7d5d..cf1cf32e3 100644 --- a/modal_deployment.py +++ b/deployment/modal_deployment.py @@ -12,8 +12,8 @@ app = modal.App("cognee-runner") image = ( modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False) - .copy_local_file("pyproject.toml", "pyproject.toml") - .copy_local_file("poetry.lock", "poetry.lock") + .copy_local_file("../pyproject.toml", "pyproject.toml") + .copy_local_file("../poetry.lock", "poetry.lock") .env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")}) .poetry_install_from_file(poetry_pyproject_toml="pyproject.toml") .pip_install("protobuf", "h2") diff --git a/licenses/README.md b/licenses/README.md deleted file mode 100644 index 8b3d13963..000000000 --- a/licenses/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Third party licenses - -This folder contains the licenses of third-party open-source software that has been redistributed in this project. -Details of included files and modifications can be found in [NOTICE](/NOTICE.md). diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py deleted file mode 100644 index a4f5c839b..000000000 --- a/profiling/graph_pydantic_conversion/benchmark_function.py +++ /dev/null @@ -1,62 +0,0 @@ -import statistics -import time -import tracemalloc -from typing import Any, Callable, Dict - -import psutil - - -def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]: - """ - Benchmark a function for memory usage and computational performance. - - Args: - func: Function to benchmark - *args: Arguments to pass to the function - num_runs: Number of times to run the benchmark - - Returns: - Dictionary containing benchmark metrics - """ - execution_times = [] - peak_memory_usages = [] - cpu_percentages = [] - - process = psutil.Process() - - for _ in range(num_runs): - # Start memory tracking - tracemalloc.start() - - # Measure execution time and CPU usage - start_time = time.perf_counter() - start_cpu_time = process.cpu_times() - - end_cpu_time = process.cpu_times() - end_time = time.perf_counter() - - # Calculate metrics - execution_time = end_time - start_time - cpu_time = (end_cpu_time.user + end_cpu_time.system) - ( - start_cpu_time.user + start_cpu_time.system - ) - current, peak = tracemalloc.get_traced_memory() - - # Store results - execution_times.append(execution_time) - peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB - cpu_percentages.append((cpu_time / execution_time) * 100) - - tracemalloc.stop() - - analysis = { - "mean_execution_time": statistics.mean(execution_times), - "mean_peak_memory_mb": statistics.mean(peak_memory_usages), - "mean_cpu_percent": statistics.mean(cpu_percentages), - "num_runs": num_runs, - } - - if num_runs > 1: - analysis["std_execution_time"] = statistics.stdev(execution_times) - - return analysis diff --git a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py deleted file mode 100644 index c1c0b6756..000000000 --- a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse -import asyncio - -from .benchmark_function import benchmark_function - -from cognee.modules.graph.utils import get_graph_from_model -from cognee.tests.unit.interfaces.graph.util import ( - PERSON_NAMES, - create_organization_recursive, -) - -# Example usage: -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Benchmark graph model with configurable recursive depth" - ) - parser.add_argument( - "--recursive-depth", - type=int, - default=3, - help="Recursive depth for graph generation (default: 3)", - ) - parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)") - args = parser.parse_args() - - society = create_organization_recursive( - "society", "Society", PERSON_NAMES, args.recursive_depth - ) - added_nodes = {} - added_edges = {} - visited_properties = {} - nodes, edges = asyncio.run( - get_graph_from_model( - society, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - ) - - def get_graph_from_model_sync(model): - added_nodes = {} - added_edges = {} - visited_properties = {} - - return asyncio.run( - get_graph_from_model( - model, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - ) - - results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs) - print("\nBenchmark Results:") - print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}") - print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB") - print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%") - print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds") - - if "std_execution_time" in results: - print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds") diff --git a/profiling/util/DummyEmbeddingEngine.py b/profiling/util/DummyEmbeddingEngine.py deleted file mode 100644 index 0ba742182..000000000 --- a/profiling/util/DummyEmbeddingEngine.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np -from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine - - -class DummyEmbeddingEngine(EmbeddingEngine): - async def embed_text(self, text: list[str]) -> list[list[float]]: - return list(list(np.random.randn(3072))) - - def get_vector_size(self) -> int: - return 3072 diff --git a/profiling/util/DummyLLMAdapter.py b/profiling/util/DummyLLMAdapter.py deleted file mode 100644 index b28261665..000000000 --- a/profiling/util/DummyLLMAdapter.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Type -from uuid import uuid4 - -import spacy -import textacy -from pydantic import BaseModel - -from cognee.infrastructure.llm.llm_interface import LLMInterface -from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent - - -class DummyLLMAdapter(LLMInterface): - nlp = spacy.load("en_core_web_sm") - - async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] - ) -> BaseModel: - if str(response_model) == "": - return dummy_summarize_content(text_input) - elif str(response_model) == "": - return dummy_extract_knowledge_graph(text_input, self.nlp) - else: - raise Exception( - "Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph" - ) - - -def dummy_extract_knowledge_graph(text, nlp): - doc = nlp(text) - triples = list(textacy.extract.subject_verb_object_triples(doc)) - - nodes = {} - edges = [] - for triple in triples: - source = "_".join([str(e) for e in triple.subject]) - target = "_".join([str(e) for e in triple.object]) - nodes[source] = nodes.get( - source, Node(id=str(uuid4()), name=source, type="object", description="") - ) - nodes[target] = nodes.get( - target, Node(id=str(uuid4()), name=target, type="object", description="") - ) - edge_type = "_".join([str(e) for e in triple.verb]) - edges.append( - Edge( - source_node_id=nodes[source].id, - target_node_id=nodes[target].id, - relationship_name=edge_type, - ) - ) - return KnowledgeGraph(nodes=list(nodes.values()), edges=edges) - - -def dummy_summarize_content(text): - words = [(word, len(word)) for word in set(text.split(" "))] - words = sorted(words, key=lambda x: x[1], reverse=True) - summary = " ".join([word for word, _ in words[:50]]) - description = " ".join([word for word, _ in words[:10]]) - return SummarizedContent(summary=summary, description=description) diff --git a/bin/dockerize b/tools/bin/dockerize similarity index 100% rename from bin/dockerize rename to tools/bin/dockerize