chore: Move files (#848)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <igorilic03@gmail.com>
2025-05-22 21:42:47 +02:00 · 2025-05-22 21:42:47 +02:00 · b71b704b50
commit b71b704b50
parent d6639217c3
28 changed files with 9 additions and 231 deletions
--- a/.data/code/example.txt
+++ b/.data/code/example.txt
@ -1,28 +0,0 @@
 '''
 	Given a string, find the length of the longest substring without repeating characters.
 	Examples:
 	Given "abcabcbb", the answer is "abc", which the length is 3.
 	Given "bbbbb", the answer is "b", with the length of 1.
 	Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
 '''
 class Solution(object):
    def lengthOfLongestSubstring(self, s):
        """
        :type s: str
        :rtype: int
        """
        mapSet = {}
        start, result = 0, 0
        for end in range(len(s)):
        	if s[end] in mapSet:
        		start = max(mapSet[s[end]], start)
        	result = max(result, end-start+1)
        	mapSet[s[end]] = end+1
        return result
--- a/.data/short_stories/soldiers-home.pdf
+++ b/.data/short_stories/soldiers-home.pdf
--- a/README.md
+++ b/README.md
@ -35,11 +35,11 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
  <p align="center">
  🌐 Available Languages
  :
-  <a href="community/README.pt.md">🇵🇹 Português</a>
+  <a href="assets/community/README.pt.md">🇵🇹 Português</a>
  ·
-  <a href="community/README.zh.md">🇨🇳 [中文]</a>
+  <a href="assets/community/README.zh.md">🇨🇳 [中文]</a>
  ·
-  <a href="community/README.ru.md">🇷🇺 Русский</a>
+  <a href="assets/community/README.ru.md">🇷🇺 Русский</a>
  </p>
--- a/assets/community/README.pt.md
+++ b/assets/community/README.pt.md
--- a/assets/community/README.ru.md
+++ b/assets/community/README.ru.md
--- a/assets/community/README.zh.md
+++ b/assets/community/README.zh.md
--- a/assets/community/cognee_benefits_zh.JPG
+++ b/assets/community/cognee_benefits_zh.JPG
--- a/assets/community/cognee_diagram_zh.JPG
+++ b/assets/community/cognee_diagram_zh.JPG
--- a/assets/community/graph_visualization_pt.png
+++ b/assets/community/graph_visualization_pt.png
--- a/assets/community/graph_visualization_ru.png
+++ b/assets/community/graph_visualization_ru.png
--- a/deployment/helm/Chart.yaml
+++ b/deployment/helm/Chart.yaml
--- a/deployment/helm/Dockerfile
+++ b/deployment/helm/Dockerfile
--- a/deployment/helm/README.md
+++ b/deployment/helm/README.md
--- a/deployment/helm/docker-compose-helm.yml
+++ b/deployment/helm/docker-compose-helm.yml
--- a/deployment/helm/templates/cognee_deployment.yaml
+++ b/deployment/helm/templates/cognee_deployment.yaml
--- a/deployment/helm/templates/cognee_service.yaml
+++ b/deployment/helm/templates/cognee_service.yaml
--- a/deployment/helm/templates/postgres_deployment.yaml
+++ b/deployment/helm/templates/postgres_deployment.yaml
--- a/deployment/helm/templates/postgres_pvc.yaml
+++ b/deployment/helm/templates/postgres_pvc.yaml
--- a/deployment/helm/templates/postgres_service.yaml
+++ b/deployment/helm/templates/postgres_service.yaml
--- a/deployment/helm/values.yaml
+++ b/deployment/helm/values.yaml
--- a/examples/data/multimedia/example.png
+++ b/examples/data/multimedia/example.png
--- a/examples/data/multimedia/text_to_speech.mp3
+++ b/examples/data/multimedia/text_to_speech.mp3
--- a/examples/python/multimedia_example.py
+++ b/examples/python/multimedia_example.py
@ -21,11 +21,11 @@ async def main():
    # and description of these files
    mp3_file_path = os.path.join(
        pathlib.Path(__file__).parent.parent.parent,
-        ".data/multimedia/text_to_speech.mp3",
+        "examples/data/multimedia/text_to_speech.mp3",
    )
    png_file_path = os.path.join(
        pathlib.Path(__file__).parent.parent.parent,
-        ".data/multimedia/example.png",
+        "examples/data/multimedia/example.png",
    )
    # Add the files, and make it available for cognify
--- a/notebooks/cognee_multimedia_demo.ipynb
+++ b/notebooks/cognee_multimedia_demo.ipynb
@ -21,10 +21,10 @@
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import os\n",
    "import pathlib\n",
@ -34,12 +34,12 @@
    "mp3_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
-    "    \".data/multimedia/text_to_speech.mp3\",\n",
+    "    \"examples/data/multimedia/text_to_speech.mp3\",\n",
    ")\n",
    "png_file_path = os.path.join(\n",
    "    os.path.abspath(\"\"),\n",
    "    \"../\",\n",
-    "    \".data/multimedia/example.png\",\n",
+    "    \"examples/data/multimedia/example.png\",\n",
    ")"
   ]
  },
--- a/profiling/graph_pydantic_conversion/benchmark_function.py
+++ b/profiling/graph_pydantic_conversion/benchmark_function.py
@ -1,62 +0,0 @@
 import statistics
 import time
 import tracemalloc
 from typing import Any, Callable, Dict
 import psutil
 def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
    """
    Benchmark a function for memory usage and computational performance.
    Args:
        func: Function to benchmark
        *args: Arguments to pass to the function
        num_runs: Number of times to run the benchmark
    Returns:
        Dictionary containing benchmark metrics
    """
    execution_times = []
    peak_memory_usages = []
    cpu_percentages = []
    process = psutil.Process()
    for _ in range(num_runs):
        # Start memory tracking
        tracemalloc.start()
        # Measure execution time and CPU usage
        start_time = time.perf_counter()
        start_cpu_time = process.cpu_times()
        end_cpu_time = process.cpu_times()
        end_time = time.perf_counter()
        # Calculate metrics
        execution_time = end_time - start_time
        cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
            start_cpu_time.user + start_cpu_time.system
        )
        current, peak = tracemalloc.get_traced_memory()
        # Store results
        execution_times.append(execution_time)
        peak_memory_usages.append(peak / 1024 / 1024)  # Convert to MB
        cpu_percentages.append((cpu_time / execution_time) * 100)
        tracemalloc.stop()
    analysis = {
        "mean_execution_time": statistics.mean(execution_times),
        "mean_peak_memory_mb": statistics.mean(peak_memory_usages),
        "mean_cpu_percent": statistics.mean(cpu_percentages),
        "num_runs": num_runs,
    }
    if num_runs > 1:
        analysis["std_execution_time"] = statistics.stdev(execution_times)
    return analysis
--- a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
+++ b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
@ -1,63 +0,0 @@
 import argparse
 import asyncio
 from .benchmark_function import benchmark_function
 from cognee.modules.graph.utils import get_graph_from_model
 from cognee.tests.unit.interfaces.graph.util import (
    PERSON_NAMES,
    create_organization_recursive,
 )
 # Example usage:
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Benchmark graph model with configurable recursive depth"
    )
    parser.add_argument(
        "--recursive-depth",
        type=int,
        default=3,
        help="Recursive depth for graph generation (default: 3)",
    )
    parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
    args = parser.parse_args()
    society = create_organization_recursive(
        "society", "Society", PERSON_NAMES, args.recursive_depth
    )
    added_nodes = {}
    added_edges = {}
    visited_properties = {}
    nodes, edges = asyncio.run(
        get_graph_from_model(
            society,
            added_nodes=added_nodes,
            added_edges=added_edges,
            visited_properties=visited_properties,
        )
    )
    def get_graph_from_model_sync(model):
        added_nodes = {}
        added_edges = {}
        visited_properties = {}
        return asyncio.run(
            get_graph_from_model(
                model,
                added_nodes=added_nodes,
                added_edges=added_edges,
                visited_properties=visited_properties,
            )
        )
    results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
    print("\nBenchmark Results:")
    print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
    print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
    print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
    print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
    if "std_execution_time" in results:
        print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
--- a/profiling/util/DummyEmbeddingEngine.py
+++ b/profiling/util/DummyEmbeddingEngine.py
@ -1,10 +0,0 @@
 import numpy as np
 from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
 class DummyEmbeddingEngine(EmbeddingEngine):
    async def embed_text(self, text: list[str]) -> list[list[float]]:
        return list(list(np.random.randn(3072)))
    def get_vector_size(self) -> int:
        return 3072
--- a/profiling/util/DummyLLMAdapter.py
+++ b/profiling/util/DummyLLMAdapter.py
@ -1,59 +0,0 @@
 from typing import Type
 from uuid import uuid4
 import spacy
 import textacy
 from pydantic import BaseModel
 from cognee.infrastructure.llm.llm_interface import LLMInterface
 from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
 class DummyLLMAdapter(LLMInterface):
    nlp = spacy.load("en_core_web_sm")
    async def acreate_structured_output(
        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
    ) -> BaseModel:
        if str(response_model) == "<class 'cognee.shared.data_models.SummarizedContent'>":
            return dummy_summarize_content(text_input)
        elif str(response_model) == "<class 'cognee.shared.data_models.KnowledgeGraph'>":
            return dummy_extract_knowledge_graph(text_input, self.nlp)
        else:
            raise Exception(
                "Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
            )
 def dummy_extract_knowledge_graph(text, nlp):
    doc = nlp(text)
    triples = list(textacy.extract.subject_verb_object_triples(doc))
    nodes = {}
    edges = []
    for triple in triples:
        source = "_".join([str(e) for e in triple.subject])
        target = "_".join([str(e) for e in triple.object])
        nodes[source] = nodes.get(
            source, Node(id=str(uuid4()), name=source, type="object", description="")
        )
        nodes[target] = nodes.get(
            target, Node(id=str(uuid4()), name=target, type="object", description="")
        )
        edge_type = "_".join([str(e) for e in triple.verb])
        edges.append(
            Edge(
                source_node_id=nodes[source].id,
                target_node_id=nodes[target].id,
                relationship_name=edge_type,
            )
        )
    return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
 def dummy_summarize_content(text):
    words = [(word, len(word)) for word in set(text.split(" "))]
    words = sorted(words, key=lambda x: x[1], reverse=True)
    summary = " ".join([word for word, _ in words[:50]])
    description = " ".join([word for word, _ in words[:10]])
    return SummarizedContent(summary=summary, description=description)