From 148eb4ed9bf44ceb56350bd890b8935aba16f949 Mon Sep 17 00:00:00 2001
From: Leon Luithlen <leon@topoteretes.com>
Date: Fri, 15 Nov 2024 16:42:14 +0100
Subject: [PATCH] Add profile_graph_pydantic_conversion.py

---
 .../benchmark_function.py                     | 64 +++++++++++++++++++
 .../profile_graph_pydantic_conversion.py      | 37 +++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 profiling/graph_pydantic_conversion/benchmark_function.py
 create mode 100644 profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py

diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py
new file mode 100644
index 000000000..95c483584
--- /dev/null
+++ b/profiling/graph_pydantic_conversion/benchmark_function.py
@@ -0,0 +1,64 @@
+import time
+import psutil
+import tracemalloc
+import statistics
+from typing import Callable, Any, Dict
+
+
+def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
+    """
+    Benchmark a function for memory usage and computational performance.
+    
+    Args:
+        func: Function to benchmark
+        *args: Arguments to pass to the function
+        num_runs: Number of times to run the benchmark
+    
+    Returns:
+        Dictionary containing benchmark metrics
+    """
+    execution_times = []
+    peak_memory_usages = []
+    cpu_percentages = []
+    
+    process = psutil.Process()
+    
+    for _ in range(num_runs):
+        # Start memory tracking
+        tracemalloc.start()
+        initial_memory = process.memory_info().rss
+        
+        # Measure execution time and CPU usage
+        start_time = time.perf_counter()
+        start_cpu_time = process.cpu_times()
+        
+        result = func(*args)
+        
+        end_cpu_time = process.cpu_times()
+        end_time = time.perf_counter()
+        
+        # Calculate metrics
+        execution_time = end_time - start_time
+        cpu_time = (end_cpu_time.user + end_cpu_time.system) - (start_cpu_time.user + start_cpu_time.system)
+        current, peak = tracemalloc.get_traced_memory()
+        final_memory = process.memory_info().rss
+        memory_used = final_memory - initial_memory
+        
+        # Store results
+        execution_times.append(execution_time)
+        peak_memory_usages.append(peak / 1024 / 1024)  # Convert to MB
+        cpu_percentages.append((cpu_time / execution_time) * 100)
+        
+        tracemalloc.stop()
+        
+    analysis = {
+        "mean_execution_time": statistics.mean(execution_times),
+        "mean_peak_memory_mb": statistics.mean(peak_memory_usages),
+        "mean_cpu_percent": statistics.mean(cpu_percentages),
+        "num_runs": num_runs
+    }
+
+    if num_runs > 1:
+        analysis["std_execution_time"] = statistics.stdev(execution_times)
+
+    return analysis
\ No newline at end of file
diff --git a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
new file mode 100644
index 000000000..75803b996
--- /dev/null
+++ b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py
@@ -0,0 +1,37 @@
+import time
+import argparse
+
+from benchmark_function import benchmark_function
+from cognee.modules.graph.utils import get_graph_from_model
+
+from cognee.tests.unit.interfaces.graph.util import (
+    PERSON_NAMES,
+    create_organization_recursive,
+)
+
+
+
+# Example usage:
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Benchmark graph model with configurable recursive depth')
+    parser.add_argument('--recursive-depth', type=int, default=3,
+                       help='Recursive depth for graph generation (default: 3)')
+    parser.add_argument('--runs', type=int, default=5,
+                       help='Number of benchmark runs (default: 5)')
+    args = parser.parse_args()
+
+
+    society = create_organization_recursive(
+        "society", "Society", PERSON_NAMES, args.recursive_depth
+    )
+    nodes, edges = get_graph_from_model(society)
+    
+    results = benchmark_function(get_graph_from_model, society, num_runs=args.runs)
+    print("\nBenchmark Results:")
+    print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
+    print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
+    print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
+    print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
+
+    if 'std_execution_time' in results:
+        print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")