chore: Move files (#848)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Igor Ilic <igorilic03@gmail.com>
|
|
@ -1,28 +0,0 @@
|
||||||
'''
|
|
||||||
Given a string, find the length of the longest substring without repeating characters.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
Given "abcabcbb", the answer is "abc", which the length is 3.
|
|
||||||
|
|
||||||
Given "bbbbb", the answer is "b", with the length of 1.
|
|
||||||
|
|
||||||
Given "pwwkew", the answer is "wke", with the length of 3. Note that the answer must be a substring, "pwke" is a subsequence and not a substring.
|
|
||||||
'''
|
|
||||||
|
|
||||||
class Solution(object):
|
|
||||||
def lengthOfLongestSubstring(self, s):
|
|
||||||
"""
|
|
||||||
:type s: str
|
|
||||||
:rtype: int
|
|
||||||
"""
|
|
||||||
mapSet = {}
|
|
||||||
start, result = 0, 0
|
|
||||||
|
|
||||||
for end in range(len(s)):
|
|
||||||
if s[end] in mapSet:
|
|
||||||
start = max(mapSet[s[end]], start)
|
|
||||||
result = max(result, end-start+1)
|
|
||||||
mapSet[s[end]] = end+1
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
@ -35,11 +35,11 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
|
||||||
<p align="center">
|
<p align="center">
|
||||||
🌐 Available Languages
|
🌐 Available Languages
|
||||||
:
|
:
|
||||||
<a href="community/README.pt.md">🇵🇹 Português</a>
|
<a href="assets/community/README.pt.md">🇵🇹 Português</a>
|
||||||
·
|
·
|
||||||
<a href="community/README.zh.md">🇨🇳 [中文]</a>
|
<a href="assets/community/README.zh.md">🇨🇳 [中文]</a>
|
||||||
·
|
·
|
||||||
<a href="community/README.ru.md">🇷🇺 Русский</a>
|
<a href="assets/community/README.ru.md">🇷🇺 Русский</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 262 KiB After Width: | Height: | Size: 262 KiB |
|
Before Width: | Height: | Size: 181 KiB After Width: | Height: | Size: 181 KiB |
|
Before Width: | Height: | Size: 603 KiB After Width: | Height: | Size: 603 KiB |
|
Before Width: | Height: | Size: 890 KiB After Width: | Height: | Size: 890 KiB |
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 10 KiB |
|
|
@ -21,11 +21,11 @@ async def main():
|
||||||
# and description of these files
|
# and description of these files
|
||||||
mp3_file_path = os.path.join(
|
mp3_file_path = os.path.join(
|
||||||
pathlib.Path(__file__).parent.parent.parent,
|
pathlib.Path(__file__).parent.parent.parent,
|
||||||
".data/multimedia/text_to_speech.mp3",
|
"examples/data/multimedia/text_to_speech.mp3",
|
||||||
)
|
)
|
||||||
png_file_path = os.path.join(
|
png_file_path = os.path.join(
|
||||||
pathlib.Path(__file__).parent.parent.parent,
|
pathlib.Path(__file__).parent.parent.parent,
|
||||||
".data/multimedia/example.png",
|
"examples/data/multimedia/example.png",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add the files, and make it available for cognify
|
# Add the files, and make it available for cognify
|
||||||
|
|
|
||||||
|
|
@ -21,10 +21,10 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 23,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"cell_type": "code",
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
|
"execution_count": null,
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import pathlib\n",
|
"import pathlib\n",
|
||||||
|
|
@ -34,12 +34,12 @@
|
||||||
"mp3_file_path = os.path.join(\n",
|
"mp3_file_path = os.path.join(\n",
|
||||||
" os.path.abspath(\"\"),\n",
|
" os.path.abspath(\"\"),\n",
|
||||||
" \"../\",\n",
|
" \"../\",\n",
|
||||||
" \".data/multimedia/text_to_speech.mp3\",\n",
|
" \"examples/data/multimedia/text_to_speech.mp3\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"png_file_path = os.path.join(\n",
|
"png_file_path = os.path.join(\n",
|
||||||
" os.path.abspath(\"\"),\n",
|
" os.path.abspath(\"\"),\n",
|
||||||
" \"../\",\n",
|
" \"../\",\n",
|
||||||
" \".data/multimedia/example.png\",\n",
|
" \"examples/data/multimedia/example.png\",\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
import statistics
|
|
||||||
import time
|
|
||||||
import tracemalloc
|
|
||||||
from typing import Any, Callable, Dict
|
|
||||||
|
|
||||||
import psutil
|
|
||||||
|
|
||||||
|
|
||||||
def benchmark_function(func: Callable, *args, num_runs: int = 5) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Benchmark a function for memory usage and computational performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
func: Function to benchmark
|
|
||||||
*args: Arguments to pass to the function
|
|
||||||
num_runs: Number of times to run the benchmark
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary containing benchmark metrics
|
|
||||||
"""
|
|
||||||
execution_times = []
|
|
||||||
peak_memory_usages = []
|
|
||||||
cpu_percentages = []
|
|
||||||
|
|
||||||
process = psutil.Process()
|
|
||||||
|
|
||||||
for _ in range(num_runs):
|
|
||||||
# Start memory tracking
|
|
||||||
tracemalloc.start()
|
|
||||||
|
|
||||||
# Measure execution time and CPU usage
|
|
||||||
start_time = time.perf_counter()
|
|
||||||
start_cpu_time = process.cpu_times()
|
|
||||||
|
|
||||||
end_cpu_time = process.cpu_times()
|
|
||||||
end_time = time.perf_counter()
|
|
||||||
|
|
||||||
# Calculate metrics
|
|
||||||
execution_time = end_time - start_time
|
|
||||||
cpu_time = (end_cpu_time.user + end_cpu_time.system) - (
|
|
||||||
start_cpu_time.user + start_cpu_time.system
|
|
||||||
)
|
|
||||||
current, peak = tracemalloc.get_traced_memory()
|
|
||||||
|
|
||||||
# Store results
|
|
||||||
execution_times.append(execution_time)
|
|
||||||
peak_memory_usages.append(peak / 1024 / 1024) # Convert to MB
|
|
||||||
cpu_percentages.append((cpu_time / execution_time) * 100)
|
|
||||||
|
|
||||||
tracemalloc.stop()
|
|
||||||
|
|
||||||
analysis = {
|
|
||||||
"mean_execution_time": statistics.mean(execution_times),
|
|
||||||
"mean_peak_memory_mb": statistics.mean(peak_memory_usages),
|
|
||||||
"mean_cpu_percent": statistics.mean(cpu_percentages),
|
|
||||||
"num_runs": num_runs,
|
|
||||||
}
|
|
||||||
|
|
||||||
if num_runs > 1:
|
|
||||||
analysis["std_execution_time"] = statistics.stdev(execution_times)
|
|
||||||
|
|
||||||
return analysis
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
import argparse
|
|
||||||
import asyncio
|
|
||||||
|
|
||||||
from .benchmark_function import benchmark_function
|
|
||||||
|
|
||||||
from cognee.modules.graph.utils import get_graph_from_model
|
|
||||||
from cognee.tests.unit.interfaces.graph.util import (
|
|
||||||
PERSON_NAMES,
|
|
||||||
create_organization_recursive,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Example usage:
|
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Benchmark graph model with configurable recursive depth"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--recursive-depth",
|
|
||||||
type=int,
|
|
||||||
default=3,
|
|
||||||
help="Recursive depth for graph generation (default: 3)",
|
|
||||||
)
|
|
||||||
parser.add_argument("--runs", type=int, default=5, help="Number of benchmark runs (default: 5)")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
society = create_organization_recursive(
|
|
||||||
"society", "Society", PERSON_NAMES, args.recursive_depth
|
|
||||||
)
|
|
||||||
added_nodes = {}
|
|
||||||
added_edges = {}
|
|
||||||
visited_properties = {}
|
|
||||||
nodes, edges = asyncio.run(
|
|
||||||
get_graph_from_model(
|
|
||||||
society,
|
|
||||||
added_nodes=added_nodes,
|
|
||||||
added_edges=added_edges,
|
|
||||||
visited_properties=visited_properties,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_graph_from_model_sync(model):
|
|
||||||
added_nodes = {}
|
|
||||||
added_edges = {}
|
|
||||||
visited_properties = {}
|
|
||||||
|
|
||||||
return asyncio.run(
|
|
||||||
get_graph_from_model(
|
|
||||||
model,
|
|
||||||
added_nodes=added_nodes,
|
|
||||||
added_edges=added_edges,
|
|
||||||
visited_properties=visited_properties,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
results = benchmark_function(get_graph_from_model_sync, society, num_runs=args.runs)
|
|
||||||
print("\nBenchmark Results:")
|
|
||||||
print(f"N nodes: {len(nodes)}, N edges: {len(edges)}, Recursion depth: {args.recursive_depth}")
|
|
||||||
print(f"Mean Peak Memory: {results['mean_peak_memory_mb']:.2f} MB")
|
|
||||||
print(f"Mean CPU Usage: {results['mean_cpu_percent']:.2f}%")
|
|
||||||
print(f"Mean Execution Time: {results['mean_execution_time']:.4f} seconds")
|
|
||||||
|
|
||||||
if "std_execution_time" in results:
|
|
||||||
print(f"Execution Time Std: {results['std_execution_time']:.4f} seconds")
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
|
|
||||||
|
|
||||||
class DummyEmbeddingEngine(EmbeddingEngine):
|
|
||||||
async def embed_text(self, text: list[str]) -> list[list[float]]:
|
|
||||||
return list(list(np.random.randn(3072)))
|
|
||||||
|
|
||||||
def get_vector_size(self) -> int:
|
|
||||||
return 3072
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
from typing import Type
|
|
||||||
from uuid import uuid4
|
|
||||||
|
|
||||||
import spacy
|
|
||||||
import textacy
|
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
|
||||||
from cognee.shared.data_models import Edge, KnowledgeGraph, Node, SummarizedContent
|
|
||||||
|
|
||||||
|
|
||||||
class DummyLLMAdapter(LLMInterface):
|
|
||||||
nlp = spacy.load("en_core_web_sm")
|
|
||||||
|
|
||||||
async def acreate_structured_output(
|
|
||||||
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
||||||
) -> BaseModel:
|
|
||||||
if str(response_model) == "<class 'cognee.shared.data_models.SummarizedContent'>":
|
|
||||||
return dummy_summarize_content(text_input)
|
|
||||||
elif str(response_model) == "<class 'cognee.shared.data_models.KnowledgeGraph'>":
|
|
||||||
return dummy_extract_knowledge_graph(text_input, self.nlp)
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
"Currently dummy acreate_structured_input is only implemented for SummarizedContent and KnowledgeGraph"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def dummy_extract_knowledge_graph(text, nlp):
|
|
||||||
doc = nlp(text)
|
|
||||||
triples = list(textacy.extract.subject_verb_object_triples(doc))
|
|
||||||
|
|
||||||
nodes = {}
|
|
||||||
edges = []
|
|
||||||
for triple in triples:
|
|
||||||
source = "_".join([str(e) for e in triple.subject])
|
|
||||||
target = "_".join([str(e) for e in triple.object])
|
|
||||||
nodes[source] = nodes.get(
|
|
||||||
source, Node(id=str(uuid4()), name=source, type="object", description="")
|
|
||||||
)
|
|
||||||
nodes[target] = nodes.get(
|
|
||||||
target, Node(id=str(uuid4()), name=target, type="object", description="")
|
|
||||||
)
|
|
||||||
edge_type = "_".join([str(e) for e in triple.verb])
|
|
||||||
edges.append(
|
|
||||||
Edge(
|
|
||||||
source_node_id=nodes[source].id,
|
|
||||||
target_node_id=nodes[target].id,
|
|
||||||
relationship_name=edge_type,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return KnowledgeGraph(nodes=list(nodes.values()), edges=edges)
|
|
||||||
|
|
||||||
|
|
||||||
def dummy_summarize_content(text):
|
|
||||||
words = [(word, len(word)) for word in set(text.split(" "))]
|
|
||||||
words = sorted(words, key=lambda x: x[1], reverse=True)
|
|
||||||
summary = " ".join([word for word, _ in words[:50]])
|
|
||||||
description = " ".join([word for word, _ in words[:10]])
|
|
||||||
return SummarizedContent(summary=summary, description=description)
|
|
||||||