diff --git a/.data/code/example.txt b/.data/code/example.txt deleted file mode 100644 index 4596a08eb..000000000 --- a/.data/code/example.txt +++ /dev/null @@ -1,28 +0,0 @@ -''' - Given a string, find the length of the longest substring without repeating characters. - - Examples: - - Given "abcabcbb", the answer is "abc", which the length is 3. - - Given "bbbbb", the answer is "b", with the length of 1. - - Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring. -''' - -class Solution(object): - def lengthOfLongestSubstring(self, s): - """ - :type s: str - :rtype: int - """ - mapSet = {} - start, result = 0, 0 - - for end in range(len(s)): - if s[end] in mapSet: - start = max(mapSet[s[end]], start) - result = max(result, end-start+1) - mapSet[s[end]] = end+1 - - return result diff --git a/.data/short_stories/soldiers-home.pdf b/.data/short_stories/soldiers-home.pdf deleted file mode 100644 index e453ca4bc..000000000 Binary files a/.data/short_stories/soldiers-home.pdf and /dev/null differ diff --git a/README.md b/README.md index ad46fe716..e78735f58 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
🌐 Available Languages : - 🇵🇹 Português + 🇵🇹 Português · - 🇨🇳 [中文] + 🇨🇳 [中文] · - 🇷🇺 Русский + 🇷🇺 Русский
diff --git a/community/README.pt.md b/assets/community/README.pt.md similarity index 100% rename from community/README.pt.md rename to assets/community/README.pt.md diff --git a/community/README.ru.md b/assets/community/README.ru.md similarity index 100% rename from community/README.ru.md rename to assets/community/README.ru.md diff --git a/community/README.zh.md b/assets/community/README.zh.md similarity index 100% rename from community/README.zh.md rename to assets/community/README.zh.md diff --git a/community/cognee_benefits_zh.JPG b/assets/community/cognee_benefits_zh.JPG similarity index 100% rename from community/cognee_benefits_zh.JPG rename to assets/community/cognee_benefits_zh.JPG diff --git a/community/cognee_diagram_zh.JPG b/assets/community/cognee_diagram_zh.JPG similarity index 100% rename from community/cognee_diagram_zh.JPG rename to assets/community/cognee_diagram_zh.JPG diff --git a/community/graph_visualization_pt.png b/assets/community/graph_visualization_pt.png similarity index 100% rename from community/graph_visualization_pt.png rename to assets/community/graph_visualization_pt.png diff --git a/community/graph_visualization_ru.png b/assets/community/graph_visualization_ru.png similarity index 100% rename from community/graph_visualization_ru.png rename to assets/community/graph_visualization_ru.png diff --git a/helm/Chart.yaml b/deployment/helm/Chart.yaml similarity index 100% rename from helm/Chart.yaml rename to deployment/helm/Chart.yaml diff --git a/helm/Dockerfile b/deployment/helm/Dockerfile similarity index 100% rename from helm/Dockerfile rename to deployment/helm/Dockerfile diff --git a/helm/README.md b/deployment/helm/README.md similarity index 100% rename from helm/README.md rename to deployment/helm/README.md diff --git a/helm/docker-compose-helm.yml b/deployment/helm/docker-compose-helm.yml similarity index 100% rename from helm/docker-compose-helm.yml rename to deployment/helm/docker-compose-helm.yml diff --git a/helm/templates/cognee_deployment.yaml b/deployment/helm/templates/cognee_deployment.yaml similarity index 100% rename from helm/templates/cognee_deployment.yaml rename to deployment/helm/templates/cognee_deployment.yaml diff --git a/helm/templates/cognee_service.yaml b/deployment/helm/templates/cognee_service.yaml similarity index 100% rename from helm/templates/cognee_service.yaml rename to deployment/helm/templates/cognee_service.yaml diff --git a/helm/templates/postgres_deployment.yaml b/deployment/helm/templates/postgres_deployment.yaml similarity index 100% rename from helm/templates/postgres_deployment.yaml rename to deployment/helm/templates/postgres_deployment.yaml diff --git a/helm/templates/postgres_pvc.yaml b/deployment/helm/templates/postgres_pvc.yaml similarity index 100% rename from helm/templates/postgres_pvc.yaml rename to deployment/helm/templates/postgres_pvc.yaml diff --git a/helm/templates/postgres_service.yaml b/deployment/helm/templates/postgres_service.yaml similarity index 100% rename from helm/templates/postgres_service.yaml rename to deployment/helm/templates/postgres_service.yaml diff --git a/helm/values.yaml b/deployment/helm/values.yaml similarity index 100% rename from helm/values.yaml rename to deployment/helm/values.yaml diff --git a/.data/multimedia/example.png b/examples/data/multimedia/example.png similarity index 100% rename from .data/multimedia/example.png rename to examples/data/multimedia/example.png diff --git a/.data/multimedia/text_to_speech.mp3 b/examples/data/multimedia/text_to_speech.mp3 similarity index 100% rename from .data/multimedia/text_to_speech.mp3 rename to examples/data/multimedia/text_to_speech.mp3 diff --git a/examples/python/multimedia_example.py b/examples/python/multimedia_example.py index 228ea0f04..c38b746ef 100644 --- a/examples/python/multimedia_example.py +++ b/examples/python/multimedia_example.py @@ -21,11 +21,11 @@ async def main(): # and description of these files mp3_file_path = os.path.join( pathlib.Path(__file__).parent.parent.parent, - ".data/multimedia/text_to_speech.mp3", + "examples/data/multimedia/text_to_speech.mp3", ) png_file_path = os.path.join( pathlib.Path(__file__).parent.parent.parent, - ".data/multimedia/example.png", + "examples/data/multimedia/example.png", ) # Add the files, and make it available for cognify diff --git a/notebooks/cognee_multimedia_demo.ipynb b/notebooks/cognee_multimedia_demo.ipynb index 9dde1aec1..d27ce069b 100644 --- a/notebooks/cognee_multimedia_demo.ipynb +++ b/notebooks/cognee_multimedia_demo.ipynb @@ -21,10 +21,10 @@ ] }, { - "cell_type": "code", - "execution_count": 23, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "import os\n", "import pathlib\n", @@ -34,12 +34,12 @@ "mp3_file_path = os.path.join(\n", " os.path.abspath(\"\"),\n", " \"../\",\n", - " \".data/multimedia/text_to_speech.mp3\",\n", + " \"examples/data/multimedia/text_to_speech.mp3\",\n", ")\n", "png_file_path = os.path.join(\n", " os.path.abspath(\"\"),\n", " \"../\",\n", - " \".data/multimedia/example.png\",\n", + " \"examples/data/multimedia/example.png\",\n", ")" ] }, diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py deleted file mode 100644 index a4f5c839b..000000000 --- a/profiling/graph_pydantic_conversion/benchmark_function.py +++ /dev/null @@ -1,62 +0,0 @@ -import statistics -import time -import tracemalloc -from typing import Any, Callable, Dict - -import psutil - - -def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]: - """ - Benchmark a function for memory usage and computational performance. - - Args: - func: Function to benchmark - *args: Arguments to pass to the function - num_runs: Number of times to run the benchmark - - Returns: - Dictionary containing benchmark metrics - """ - execution_times = [] - peak_memory_usages = [] - cpu_percentages = [] - - process = psutil.Process() - - for _ in range(num_runs): - # Start memory tracking - tracemalloc.start() - - # Measure execution time and CPU usage - start_time = time.perf_counter() - start_cpu_time = process.cpu_times() - - end_cpu_time = process.cpu_times() - end_time = time.perf_counter() - - # Calculate metrics - execution_time = end_time - start_time - cpu_time = (end_cpu_time.user + end_cpu_time.system) - ( - start_cpu_time.user + start_cpu_time.system - ) - current, peak = tracemalloc.get_traced_memory() - - # Store results - execution_times.append(execution_time) - peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB - cpu_percentages.append((cpu_time / execution_time) * 100) - - tracemalloc.stop() - - analysis = { - "mean_execution_time": statistics.mean(execution_times), - "mean_peak_memory_mb": statistics.mean(peak_memory_usages), - "mean_cpu_percent": statistics.mean(cpu_percentages), - "num_runs": num_runs, - } - - if num_runs > 1: - analysis["std_execution_time"] = statistics.stdev(execution_times) - - return analysis diff --git a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py deleted file mode 100644 index c1c0b6756..000000000 --- a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py +++ /dev/null @@ -1,63 +0,0 @@ -import argparse -import asyncio - -from .benchmark_function import benchmark_function - -from cognee.modules.graph.utils import get_graph_from_model -from cognee.tests.unit.interfaces.graph.util import ( - PERSON_NAMES, - create_organization_recursive, -) - -# Example usage: -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Benchmark graph model with configurable recursive depth" - ) - parser.add_argument( - "--recursive-depth", - type=int, - default=3, - help="Recursive depth for graph generation (default: 3)", - ) - parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)") - args = parser.parse_args() - - society = create_organization_recursive( - "society", "Society", PERSON_NAMES, args.recursive_depth - ) - added_nodes = {} - added_edges = {} - visited_properties = {} - nodes, edges = asyncio.run( - get_graph_from_model( - society, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - ) - - def get_graph_from_model_sync(model): - added_nodes = {} - added_edges = {} - visited_properties = {} - - return asyncio.run( - get_graph_from_model( - model, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - ) - - results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs) - print("\nBenchmark Results:") - print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}") - print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB") - print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%") - print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds") - - if "std_execution_time" in results: - print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds") diff --git a/profiling/util/DummyEmbeddingEngine.py b/profiling/util/DummyEmbeddingEngine.py deleted file mode 100644 index 0ba742182..000000000 --- a/profiling/util/DummyEmbeddingEngine.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np -from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine - - -class DummyEmbeddingEngine(EmbeddingEngine): - async def embed_text(self, text: list[str]) -> list[list[float]]: - return list(list(np.random.randn(3072))) - - def get_vector_size(self) -> int: - return 3072 diff --git a/profiling/util/DummyLLMAdapter.py b/profiling/util/DummyLLMAdapter.py deleted file mode 100644 index b28261665..000000000 --- a/profiling/util/DummyLLMAdapter.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import Type -from uuid import uuid4 - -import spacy -import textacy -from pydantic import BaseModel - -from cognee.infrastructure.llm.llm_interface import LLMInterface -from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent - - -class DummyLLMAdapter(LLMInterface): - nlp = spacy.load("en_core_web_sm") - - async def acreate_structured_output( - self, text_input: str, system_prompt: str, response_model: Type[BaseModel] - ) -> BaseModel: - if str(response_model) == "