From 148eb4ed9bf44ceb56350bd890b8935aba16f949 Mon Sep 17 00:00:00 2001 From: Leon Luithlen Date: Fri, 15 Nov 2024 16:42:14 +0100 Subject: [PATCH] Add profile_graph_pydantic_conversion.py --- .../benchmark_function.py | 64 +++++++++++++++++++ .../profile_graph_pydantic_conversion.py | 37 +++++++++++ 2 files changed, 101 insertions(+) create mode 100644 profiling/graph_pydantic_conversion/benchmark_function.py create mode 100644 profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py diff --git a/profiling/graph_pydantic_conversion/benchmark_function.py b/profiling/graph_pydantic_conversion/benchmark_function.py new file mode 100644 index 000000000..95c483584 --- /dev/null +++ b/profiling/graph_pydantic_conversion/benchmark_function.py @@ -0,0 +1,64 @@ +import time +import psutil +import tracemalloc +import statistics +from typing import Callable, Any, Dict + + +def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]: + """ + Benchmark a function for memory usage and computational performance. + + Args: + func: Function to benchmark + *args: Arguments to pass to the function + num_runs: Number of times to run the benchmark + + Returns: + Dictionary containing benchmark metrics + """ + execution_times = [] + peak_memory_usages = [] + cpu_percentages = [] + + process = psutil.Process() + + for _ in range(num_runs): + # Start memory tracking + tracemalloc.start() + initial_memory = process.memory_info().rss + + # Measure execution time and CPU usage + start_time = time.perf_counter() + start_cpu_time = process.cpu_times() + + result = func(*args) + + end_cpu_time = process.cpu_times() + end_time = time.perf_counter() + + # Calculate metrics + execution_time = end_time - start_time + cpu_time = (end_cpu_time.user + end_cpu_time.system) - (start_cpu_time.user + start_cpu_time.system) + current, peak = tracemalloc.get_traced_memory() + final_memory = process.memory_info().rss + memory_used = final_memory - initial_memory + + # Store results + execution_times.append(execution_time) + peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB + cpu_percentages.append((cpu_time / execution_time) * 100) + + tracemalloc.stop() + + analysis = { + "mean_execution_time": statistics.mean(execution_times), + "mean_peak_memory_mb": statistics.mean(peak_memory_usages), + "mean_cpu_percent": statistics.mean(cpu_percentages), + "num_runs": num_runs + } + + if num_runs > 1: + analysis["std_execution_time"] = statistics.stdev(execution_times) + + return analysis \ No newline at end of file diff --git a/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py new file mode 100644 index 000000000..75803b996 --- /dev/null +++ b/profiling/graph_pydantic_conversion/profile_graph_pydantic_conversion.py @@ -0,0 +1,37 @@ +import time +import argparse + +from benchmark_function import benchmark_function +from cognee.modules.graph.utils import get_graph_from_model + +from cognee.tests.unit.interfaces.graph.util import ( + PERSON_NAMES, + create_organization_recursive, +) + + + +# Example usage: +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Benchmark graph model with configurable recursive depth') + parser.add_argument('--recursive-depth', type=int, default=3, + help='Recursive depth for graph generation (default: 3)') + parser.add_argument('--runs', type=int, default=5, + help='Number of benchmark runs (default: 5)') + args = parser.parse_args() + + + society = create_organization_recursive( + "society", "Society", PERSON_NAMES, args.recursive_depth + ) + nodes, edges = get_graph_from_model(society) + + results = benchmark_function(get_graph_from_model, society, num_runs=args.runs) + print("\nBenchmark Results:") + print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}") + print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB") + print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%") + print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds") + + if 'std_execution_time' in results: + print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")