Merge branch 'main' into feat/COG-544-eval-on-swe-bench

This commit is contained in:
Rita Aleksziev 2024-11-15 17:15:16 +01:00
commit 4d6229bf7e
18 changed files with 719 additions and 546 deletions

View file

@ -62,3 +62,8 @@ class GraphDBInterface(Protocol):
async def delete_graph(
self,
): raise NotImplementedError
@abstractmethod
async def get_graph_data(
self
): raise NotImplementedError

View file

@ -27,9 +27,6 @@ class Neo4jAdapter(GraphDBInterface):
max_connection_lifetime = 120
)
async def close(self) -> None:
await self.driver.close()
@asynccontextmanager
async def get_session(self) -> AsyncSession:
async with self.driver.session() as session:

View file

@ -112,10 +112,18 @@ class LanceDBAdapter(VectorDBInterface):
for (data_point_index, data_point) in enumerate(data_points)
]
await collection.merge_insert("id") \
.when_matched_update_all() \
.when_not_matched_insert_all() \
.execute(lance_data_points)
# TODO: This enables us to work with pydantic version but shouldn't
# stay like this, existing rows should be updated
await collection.delete("id IS NOT NULL")
original_size = await collection.count_rows()
await collection.add(lance_data_points)
new_size = await collection.count_rows()
if new_size <= original_size:
raise ValueError(
"LanceDB create_datapoints error: data points did not get added.")
async def retrieve(self, collection_name: str, data_point_ids: list[str]):

View file

@ -17,6 +17,7 @@ class TextChunker():
self.get_text = get_text
def read(self):
self.paragraph_chunks = []
for content_text in self.get_text():
for chunk_data in chunk_by_paragraph(
content_text,

View file

@ -1,84 +0,0 @@
import random
from datasets import load_dataset
from dspy.datasets.dataset import Dataset
class HotPotQA(Dataset):
def __init__(self, *args, only_hard_examples=True, keep_details='dev_titles', unofficial_dev=True, **kwargs) -> None:
super().__init__(*args, **kwargs)
assert only_hard_examples, "Care must be taken when adding support for easy examples." \
"Dev must be all hard to match official dev, but training can be flexible."
hf_official_train = load_dataset("hotpot_qa", 'fullwiki', split='train')
hf_official_dev = load_dataset("hotpot_qa", 'fullwiki', split='validation')
official_train = []
for raw_example in hf_official_train:
if raw_example['level'] == 'hard':
if keep_details is True:
keys = ['id', 'question', 'answer', 'type', 'supporting_facts', 'context']
elif keep_details == 'dev_titles':
keys = ['question', 'answer', 'supporting_facts']
else:
keys = ['question', 'answer']
example = {k: raw_example[k] for k in keys}
if 'supporting_facts' in example:
example['gold_titles'] = set(example['supporting_facts']['title'])
del example['supporting_facts']
official_train.append(example)
rng = random.Random(0)
rng.shuffle(official_train)
self._train = official_train[:len(official_train)*75//100]
if unofficial_dev:
self._dev = official_train[len(official_train)*75//100:]
else:
self._dev = None
for example in self._train:
if keep_details == 'dev_titles':
del example['gold_titles']
test = []
for raw_example in hf_official_dev:
assert raw_example['level'] == 'hard'
example = {k: raw_example[k] for k in ['id', 'question', 'answer', 'type', 'supporting_facts']}
if 'supporting_facts' in example:
example['gold_titles'] = set(example['supporting_facts']['title'])
del example['supporting_facts']
test.append(example)
self._test = test
if __name__ == '__main__':
from dsp.utils import dotdict
data_args = dotdict(train_seed=1, train_size=16, eval_seed=2023, dev_size=200*5, test_size=0)
dataset = HotPotQA(**data_args)
print(dataset)
print(dataset.train[0].question)
print(dataset.train[15].question)
print(len(dataset.train), len(dataset.dev), len(dataset.test))
print(dataset.dev[0].question)
print(dataset.dev[340].question)
print(dataset.dev[937].question)
"""
What was the population of the city where Woodward Avenue ends in 2010?
Where did the star , who is also an executive producer, of the Mick begin her carrer?
16 1000 0
Both London and German have seen attacks during war, there was one specific type of attack that Germany called the blitz, what did London call a similar attack?
Pre-Madonna was a collection of demos by the singer who was a leading presence during the emergence of what network?
Alan Mills composed the classic folk song that tells the story of what?
"""

View file

@ -1,65 +0,0 @@
import dspy
from dspy.evaluate.evaluate import Evaluate
from dspy.primitives.example import Example
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
def evaluate():
dataset = HotPotQA(
train_seed = 1,
train_size = 16,
eval_seed = 2023,
dev_size = 8,
test_size = 0,
keep_details = True,
)
#Evaluate
evaluate_examples = [
Example(
base = None,
question = None,
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
answer = example.answer,
) for example in dataset.dev
]
devset = [example.with_inputs("context", "question") for example in evaluate_examples]
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
def evaluate_answer(example, graph_prediction, trace = None):
llm_client = get_llm_client()
try:
answer_prediction = llm_client.create_structured_output(
text_input = example.question,
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
Use only the graph to answer the question and be very brief.
This is the knowledge graph:
{graph_prediction.graph.model_dump(mode = "json")}""",
response_model = Answer,
)
except:
return False
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
dsp.passage_match([example.answer], [answer_prediction.answer])
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
dspy.settings.configure(lm = gpt4)
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
if __name__ == "__main__":
evaluate()

View file

@ -1,89 +0,0 @@
import dspy
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.llm import get_llm_config
def run():
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
text = """The 1985 FA Charity Shield (also known as the General Motors FA
Charity Shield for sponsorship reasons) was the 63rd FA Charity Shield,
an annual football match played between the winners of the previous
season's First Division and FA Cup competitions. The match was played on
10 August 1985 at Wembley Stadium and contested by Everton,
who had won the 1984\u201385 First Division, and Manchester United,
who had won the 1984\u201385 FA Cup. Everton won 2\u20130 with goals from
Trevor Steven and Adrian Heath. Trevor Steven put Everton into the lead
when he swept home from six yards after a cross from the left in the first half.
The second goal came in the second half when Manchester United goalkeeper
Gary Bailey dropped a cross from the left to allow Adrian Heath to tip the
ball past him into the left corner of the net.\r\nThe 1995 FA Charity Shield
(also known as the Littlewoods FA Charity Shield for sponsorship reasons) was the
73rd FA Charity Shield, an annual football match played between the winners of
the previous season's Premier League and FA Cup competitions. The match was
played on 13 August 1995 at Wembley Stadium and contested by Blackburn Rovers,
who had won the Premier League and FA Cup winners Everton. It was Blackburn's
second successive Charity Shield appearance, while Everton were appearing in
their eleventh and their first since 1987. Everton won the match 1\u20130
with a goal from Vinny Samways when he caught Tim Flowers off his line and
lifted the ball over him from the left of the penalty area and into the right
corner of the net. Dave Watson lifted the trophy for Everton.\r\nThe 1972 FA
Charity Shield was contested between Manchester City and Aston Villa.\r\nThe
1997 FA Charity Shield (known as the Littlewoods FA Charity Shield for
sponsorship reasons) was the 75th FA Charity Shield, an annual football match
played between the winners of the previous season's Premier League and
FA Cup competitions. The match was played on 3 August 1997 at Wembley Stadium
and contested by Manchester United, who had won the 1996\u201397 FA Premier League,
and Chelsea, who had won the 1996\u201397 FA Cup. Manchester United won the match
4\u20132 on penalties after the match had finished at 1\u20131 after 90 minutes.
\r\nThe 1956 FA Charity Shield was the 34th FA Charity Shield, an annual football
match held between the winners of the previous season's Football League and
FA Cup competitions. The match was contested by Manchester United, who had won
the 1955\u201356 Football League, and Manchester City, who had won the
1955\u201356 FA Cup, at Maine Road, Manchester, on 24 October 1956. Manchester
United won the match 1\u20130, Dennis Viollet scoring the winning goal.
Manchester United goalkeeper David Gaskell made his debut for the club during
the game, taking the place of injured goalkeeper Ray Wood, and, at the age of
16 years and 19 days, became the youngest player ever to play for the club.
\r\nThe 1937 FA Charity Shield was the 24th FA Charity Shield, a football match
between the winners of the previous season's First Division and FA Cup competitions.
The match was contested by league champions Manchester City and FA Cup winners
Sunderland, and was played at Maine Road, the home ground of Manchester City.
Manchester City won the game, 2\u20130.\r\nThe 2000 FA Charity Shield (also known
as the One 2 One FA Charity Shield for sponsorship reasons) was the
78th FA Charity Shield, an annual football match played between the winners
of the previous season's Premier League and FA Cup competitions. The match
was played between Manchester United, who won the 1999\u20132000 Premier League,
and Chelsea, who won the 1999\u20132000 FA Cup, and resulted in a 2\u20130 Chelsea win.
The goals were scored by Jimmy Floyd Hasselbaink and Mario Melchiot. Roy Keane
was sent off for a challenge on Gustavo Poyet and was the last person to be
sent off at the old Wembley Stadium.\r\nThe 2001 FA Charity Shield (also known
as the One 2 One FA Charity Shield for sponsorship reasons) was the 79th FA Charity Shield,
an annual football match played between the winners of the previous season's
Premier League and FA Cup. The match was contested between Liverpool, winners of
the 2000\u201301 FA Cup and Manchester United, who won the 2000\u201301 Premier
League on 12 August 2001. It was the first Shield match to be held at the
Millennium Stadium following the closure of Wembley Stadium for reconstruction.
\r\nAston Villa Football Club ( ; nicknamed Villa, The Villa, The Villans
and The Lions) is a professional football club in Aston, Birmingham, that plays
in the Championship, the second level of English football. Founded in 1874,
they have played at their current home ground, Villa Park, since 1897. Aston Villa
were one of the founder members of the Football League in 1888 and of the
Premier League in 1992.\r\nThe 1996 FA Charity Shield (also known as the
Littlewoods FA Charity Shield for sponsorship reasons) was the 74th FA Charity Shield,
an annual football match played between the winners of the previous season's Premier
League and FA Cup competitions. The match was played on 11 August 1996 at Wembley
Stadium and contested by Manchester United, who had won the Double of Premier League
and FA Cup in 1995\u201396, and Newcastle United, who had finished as runners-up
in the Premier League. Manchester United won the match 4\u20130 with goals from
Eric Cantona, Nicky Butt, David Beckham and Roy Keane."""
prediction = compiled_extract_knowledge_graph(context = text, question = "")
print(prediction.graph)
if __name__ == "__main__":
run()

View file

@ -1,68 +0,0 @@
import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.primitives.example import Example
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.files.storage import LocalStorage
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
from cognee.infrastructure.llm import get_llm_config
def train():
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
dspy.configure(rm = colbertv2_wiki17_abstracts)
def evaluate_answer(example, graph_prediction, trace = None):
llm_client = get_llm_client()
try:
answer_prediction = llm_client.create_structured_output(
text_input = example.question,
system_prompt = f"""Answer the question by looking at the provided knowledge graph.
Use only the graph to answer the question and be very brief.
This is the knowledge graph:
{graph_prediction.graph.model_dump(mode = "json")}""",
response_model = Answer,
)
except:
return False
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
dsp.passage_match([example.answer], [answer_prediction.answer])
optimizer = BootstrapFewShot(metric = evaluate_answer)
dataset = HotPotQA(
train_seed = 1,
train_size = 16,
eval_seed = 2023,
dev_size = 8,
test_size = 0,
keep_details = True,
)
# Train
train_examples = [
Example(
base = None,
question = example.question,
context = "\r\n".join("".join(sentences) for sentences in example.context["sentences"]),
answer = example.answer,
) for example in dataset.train
]
trainset = [example.with_inputs("context", "question") for example in train_examples]
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
# Save program
LocalStorage.ensure_directory_exists(get_absolute_path("./programs/extract_knowledge_graph"))
compiled_extract_knowledge_graph.save(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
if __name__ == "__main__":
train()

View file

@ -0,0 +1,35 @@
from abc import ABC, abstractmethod
from typing import List, Dict, Union
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
class CogneeAbstractGraph(ABC):
"""
Abstract base class for representing a graph structure.
"""
@abstractmethod
def add_node(self, node: Node) -> None:
"""Add a node to the graph."""
pass
@abstractmethod
def add_edge(self, edge: Edge) -> None:
"""Add an edge to the graph."""
pass
@abstractmethod
def get_node(self, node_id: str) -> Node:
"""Retrieve a node by its ID."""
pass
@abstractmethod
def get_edges(self, node_id: str) -> List[Edge]:
"""Retrieve edges connected to a specific node."""
pass
@abstractmethod
async def project_graph_from_db(self, adapter: GraphDBInterface, directed: bool, dimension: int) -> None:
"""Project the graph structure from a database using the provided adapter."""
pass

View file

@ -0,0 +1,91 @@
from typing import List, Dict, Union
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.modules.graph.cognee_graph.CogneeAbstractGraph import CogneeAbstractGraph
from cognee.infrastructure.databases.graph import get_graph_engine
class CogneeGraph(CogneeAbstractGraph):
"""
Concrete implementation of the AbstractGraph class for Cognee.
This class provides the functionality to manage nodes and edges,
and project a graph from a database using adapters.
"""
nodes: Dict[str, Node]
edges: List[Edge]
directed: bool
def __init__(self, directed: bool = True):
self.nodes = {}
self.edges = []
self.directed = directed
def add_node(self, node: Node) -> None:
if node.id not in self.nodes:
self.nodes[node.id] = node
else:
raise ValueError(f"Node with id {node.id} already exists.")
def add_edge(self, edge: Edge) -> None:
if edge not in self.edges:
self.edges.append(edge)
edge.node1.add_skeleton_edge(edge)
edge.node2.add_skeleton_edge(edge)
else:
raise ValueError(f"Edge {edge} already exists in the graph.")
def get_node(self, node_id: str) -> Node:
return self.nodes.get(node_id, None)
def get_edges(self, node_id: str) -> List[Edge]:
node = self.get_node(node_id)
if node:
return node.skeleton_edges
else:
raise ValueError(f"Node with id {node_id} does not exist.")
async def project_graph_from_db(self,
adapter: Union[GraphDBInterface],
node_properties_to_project: List[str],
edge_properties_to_project: List[str],
directed = True,
node_dimension = 1,
edge_dimension = 1) -> None:
if node_dimension < 1 or edge_dimension < 1:
raise ValueError("Dimensions must be positive integers")
try:
nodes_data, edges_data = await adapter.get_graph_data()
if not nodes_data:
raise ValueError("No node data retrieved from the database.")
if not edges_data:
raise ValueError("No edge data retrieved from the database.")
for node_id, properties in nodes_data:
node_attributes = {key: properties.get(key) for key in node_properties_to_project}
self.add_node(Node(str(node_id), node_attributes, dimension=node_dimension))
for source_id, target_id, relationship_type, properties in edges_data:
source_node = self.get_node(str(source_id))
target_node = self.get_node(str(target_id))
if source_node and target_node:
edge_attributes = {key: properties.get(key) for key in edge_properties_to_project}
edge_attributes['relationship_type'] = relationship_type
edge = Edge(source_node, target_node, attributes=edge_attributes, directed=directed, dimension=edge_dimension)
self.add_edge(edge)
source_node.add_skeleton_edge(edge)
target_node.add_skeleton_edge(edge)
else:
raise ValueError(f"Edge references nonexistent nodes: {source_id} -> {target_id}")
except (ValueError, TypeError) as e:
print(f"Error projecting graph: {e}")
except Exception as ex:
print(f"Unexpected error: {ex}")

View file

@ -0,0 +1,114 @@
import numpy as np
from typing import List, Dict, Optional, Any
class Node:
"""
Represents a node in a graph.
Attributes:
id (str): A unique identifier for the node.
attributes (Dict[str, Any]): A dictionary of attributes associated with the node.
neighbors (List[Node]): Represents the original nodes
skeleton_edges (List[Edge]): Represents the original edges
"""
id: str
attributes: Dict[str, Any]
skeleton_neighbours: List["Node"]
skeleton_edges: List["Edge"]
status: np.ndarray
def __init__(self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1):
if dimension <= 0:
raise ValueError("Dimension must be a positive integer")
self.id = node_id
self.attributes = attributes if attributes is not None else {}
self.skeleton_neighbours = []
self.skeleton_edges = []
self.status = np.ones(dimension, dtype=int)
def add_skeleton_neighbor(self, neighbor: "Node") -> None:
if neighbor not in self.skeleton_neighbours:
self.skeleton_neighbours.append(neighbor)
def remove_skeleton_neighbor(self, neighbor: "Node") -> None:
if neighbor in self.skeleton_neighbours:
self.skeleton_neighbours.remove(neighbor)
def add_skeleton_edge(self, edge: "Edge") -> None:
if edge not in self.skeleton_edges:
self.skeleton_edges.append(edge)
# Add neighbor
if edge.node1 == self:
self.add_skeleton_neighbor(edge.node2)
elif edge.node2 == self:
self.add_skeleton_neighbor(edge.node1)
def remove_skeleton_edge(self, edge: "Edge") -> None:
if edge in self.skeleton_edges:
self.skeleton_edges.remove(edge)
# Remove neighbor if no other edge connects them
neighbor = edge.node2 if edge.node1 == self else edge.node1
if all(e.node1 != neighbor and e.node2 != neighbor for e in self.skeleton_edges):
self.remove_skeleton_neighbor(neighbor)
def is_node_alive_in_dimension(self, dimension: int) -> bool:
if dimension < 0 or dimension >= len(self.status):
raise ValueError(f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}.")
return self.status[dimension] == 1
def __repr__(self) -> str:
return f"Node({self.id}, attributes={self.attributes})"
def __hash__(self) -> int:
return hash(self.id)
def __eq__(self, other: "Node") -> bool:
return isinstance(other, Node) and self.id == other.id
class Edge:
"""
Represents an edge in a graph, connecting two nodes.
Attributes:
node1 (Node): The starting node of the edge.
node2 (Node): The ending node of the edge.
attributes (Dict[str, Any]): A dictionary of attributes associated with the edge.
directed (bool): A flag indicating whether the edge is directed or undirected.
"""
node1: "Node"
node2: "Node"
attributes: Dict[str, Any]
directed: bool
status: np.ndarray
def __init__(self, node1: "Node", node2: "Node", attributes: Optional[Dict[str, Any]] = None, directed: bool = True, dimension: int = 1):
if dimension <= 0:
raise ValueError("Dimensions must be a positive integer.")
self.node1 = node1
self.node2 = node2
self.attributes = attributes if attributes is not None else {}
self.directed = directed
self.status = np.ones(dimension, dtype=int)
def is_edge_alive_in_dimension(self, dimension: int) -> bool:
if dimension < 0 or dimension >= len(self.status):
raise ValueError(f"Dimension {dimension} is out of range. Valid range is 0 to {len(self.status) - 1}.")
return self.status[dimension] == 1
def __repr__(self) -> str:
direction = "->" if self.directed else "--"
return f"Edge({self.node1.id} {direction} {self.node2.id}, attributes={self.attributes})"
def __hash__(self) -> int:
if self.directed:
return hash((self.node1, self.node2))
else:
return hash(frozenset({self.node1, self.node2}))
def __eq__(self, other: "Edge") -> bool:
if not isinstance(other, Edge):
return False
if self.directed:
return self.node1 == other.node1 and self.node2 == other.node2
else:
return {self.node1, self.node2} == {other.node1, other.node2}

View file

@ -28,7 +28,7 @@ class Class(DataPoint):
description: str
constructor_parameters: List[Variable]
extended_from_class: Optional["Class"] = None
has_methods: list["Function"]
has_methods: List["Function"]
_metadata = {
"index_fields": ["name"]
@ -89,7 +89,8 @@ class SourceCodeGraph(DataPoint):
Operator,
Expression,
]]
Class.model_rebuild()
ClassInstance.model_rebuild()
Expression.model_rebuild()
FunctionCall.model_rebuild()
SourceCodeGraph.model_rebuild()

View file

@ -0,0 +1,144 @@
import pytest
import numpy as np
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
def test_node_initialization():
"""Test that a Node is initialized correctly."""
node = Node("node1", {"attr1": "value1"}, dimension=2)
assert node.id == "node1"
assert node.attributes == {"attr1": "value1"}
assert len(node.status) == 2
assert np.all(node.status == 1)
def test_node_invalid_dimension():
"""Test that initializing a Node with a non-positive dimension raises an error."""
with pytest.raises(ValueError, match="Dimension must be a positive integer"):
Node("node1", dimension=0)
def test_add_skeleton_neighbor():
"""Test adding a neighbor to a node."""
node1 = Node("node1")
node2 = Node("node2")
node1.add_skeleton_neighbor(node2)
assert node2 in node1.skeleton_neighbours
def test_remove_skeleton_neighbor():
"""Test removing a neighbor from a node."""
node1 = Node("node1")
node2 = Node("node2")
node1.add_skeleton_neighbor(node2)
node1.remove_skeleton_neighbor(node2)
assert node2 not in node1.skeleton_neighbours
def test_add_skeleton_edge():
"""Test adding an edge updates both skeleton_edges and skeleton_neighbours."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2)
node1.add_skeleton_edge(edge)
assert edge in node1.skeleton_edges
assert node2 in node1.skeleton_neighbours
def test_remove_skeleton_edge():
"""Test removing an edge updates both skeleton_edges and skeleton_neighbours."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2)
node1.add_skeleton_edge(edge)
node1.remove_skeleton_edge(edge)
assert edge not in node1.skeleton_edges
assert node2 not in node1.skeleton_neighbours
def test_is_node_alive_in_dimension():
"""Test checking node's alive status in a specific dimension."""
node = Node("node1", dimension=2)
assert node.is_node_alive_in_dimension(1)
node.status[1] = 0
assert not node.is_node_alive_in_dimension(1)
def test_node_alive_invalid_dimension():
"""Test that checking alive status with an invalid dimension raises an error."""
node = Node("node1", dimension=1)
with pytest.raises(ValueError, match="Dimension 1 is out of range"):
node.is_node_alive_in_dimension(1)
def test_node_equality():
"""Test equality between nodes."""
node1 = Node("node1")
node2 = Node("node1")
assert node1 == node2
def test_node_hash():
"""Test hashing for Node."""
node = Node("node1")
assert hash(node) == hash("node1")
### Tests for Edge ###
def test_edge_initialization():
"""Test that an Edge is initialized correctly."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2, {"weight": 10}, directed=False, dimension=2)
assert edge.node1 == node1
assert edge.node2 == node2
assert edge.attributes == {"weight": 10}
assert edge.directed is False
assert len(edge.status) == 2
assert np.all(edge.status == 1)
def test_edge_invalid_dimension():
"""Test that initializing an Edge with a non-positive dimension raises an error."""
node1 = Node("node1")
node2 = Node("node2")
with pytest.raises(ValueError, match="Dimensions must be a positive integer."):
Edge(node1, node2, dimension=0)
def test_is_edge_alive_in_dimension():
"""Test checking edge's alive status in a specific dimension."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2, dimension=2)
assert edge.is_edge_alive_in_dimension(1)
edge.status[1] = 0
assert not edge.is_edge_alive_in_dimension(1)
def test_edge_alive_invalid_dimension():
"""Test that checking alive status with an invalid dimension raises an error."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2, dimension=1)
with pytest.raises(ValueError, match="Dimension 1 is out of range"):
edge.is_edge_alive_in_dimension(1)
def test_edge_equality_directed():
"""Test equality between directed edges."""
node1 = Node("node1")
node2 = Node("node2")
edge1 = Edge(node1, node2, directed=True)
edge2 = Edge(node1, node2, directed=True)
assert edge1 == edge2
def test_edge_equality_undirected():
"""Test equality between undirected edges."""
node1 = Node("node1")
node2 = Node("node2")
edge1 = Edge(node1, node2, directed=False)
edge2 = Edge(node2, node1, directed=False)
assert edge1 == edge2
def test_edge_hash_directed():
"""Test hashing for directed edges."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2, directed=True)
assert hash(edge) == hash((node1, node2))
def test_edge_hash_undirected():
"""Test hashing for undirected edges."""
node1 = Node("node1")
node2 = Node("node2")
edge = Edge(node1, node2, directed=False)
assert hash(edge) == hash(frozenset({node1, node2}))

View file

@ -0,0 +1,79 @@
import pytest
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Node, Edge
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
@pytest.fixture
def setup_graph():
"""Fixture to initialize a CogneeGraph instance."""
return CogneeGraph()
def test_add_node_success(setup_graph):
"""Test successful addition of a node."""
graph = setup_graph
node = Node("node1")
graph.add_node(node)
assert graph.get_node("node1") == node
def test_add_duplicate_node(setup_graph):
"""Test adding a duplicate node raises an exception."""
graph = setup_graph
node = Node("node1")
graph.add_node(node)
with pytest.raises(ValueError, match="Node with id node1 already exists."):
graph.add_node(node)
def test_add_edge_success(setup_graph):
"""Test successful addition of an edge."""
graph = setup_graph
node1 = Node("node1")
node2 = Node("node2")
graph.add_node(node1)
graph.add_node(node2)
edge = Edge(node1, node2)
graph.add_edge(edge)
assert edge in graph.edges
assert edge in node1.skeleton_edges
assert edge in node2.skeleton_edges
def test_add_duplicate_edge(setup_graph):
"""Test adding a duplicate edge raises an exception."""
graph = setup_graph
node1 = Node("node1")
node2 = Node("node2")
graph.add_node(node1)
graph.add_node(node2)
edge = Edge(node1, node2)
graph.add_edge(edge)
with pytest.raises(ValueError, match="Edge .* already exists in the graph."):
graph.add_edge(edge)
def test_get_node_success(setup_graph):
"""Test retrieving an existing node."""
graph = setup_graph
node = Node("node1")
graph.add_node(node)
assert graph.get_node("node1") == node
def test_get_node_nonexistent(setup_graph):
"""Test retrieving a nonexistent node returns None."""
graph = setup_graph
assert graph.get_node("nonexistent") is None
def test_get_edges_success(setup_graph):
"""Test retrieving edges of a node."""
graph = setup_graph
node1 = Node("node1")
node2 = Node("node2")
graph.add_node(node1)
graph.add_node(node2)
edge = Edge(node1, node2)
graph.add_edge(edge)
assert edge in graph.get_edges("node1")
def test_get_edges_nonexistent_node(setup_graph):
"""Test retrieving edges for a nonexistent node raises an exception."""
graph = setup_graph
with pytest.raises(ValueError, match="Node with id nonexistent does not exist."):
graph.get_edges("nonexistent")

View file

@ -0,0 +1,229 @@
import cognee
import asyncio
from cognee.api.v1.search import SearchType
job_position = """0:Senior Data Scientist (Machine Learning)
Company: TechNova Solutions
Location: San Francisco, CA
Job Description:
TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights.
Responsibilities:
Develop and implement advanced machine learning algorithms and models.
Analyze large, complex datasets to extract meaningful patterns and insights.
Collaborate with cross-functional teams to integrate predictive models into products.
Stay updated with the latest advancements in machine learning and data science.
Mentor junior data scientists and provide technical guidance.
Qualifications:
Masters or Ph.D. in Data Science, Computer Science, Statistics, or a related field.
5+ years of experience in data science and machine learning.
Proficient in Python, R, and SQL.
Experience with deep learning frameworks (e.g., TensorFlow, PyTorch).
Strong problem-solving skills and attention to detail.
Candidate CVs
"""
job_1 = """
CV 1: Relevant
Name: Dr. Emily Carter
Contact Information:
Email: emily.carter@example.com
Phone: (555) 123-4567
Summary:
Senior Data Scientist with over 8 years of experience in machine learning and predictive analytics. Expertise in developing advanced algorithms and deploying scalable models in production environments.
Education:
Ph.D. in Computer Science, Stanford University (2014)
B.S. in Mathematics, University of California, Berkeley (2010)
Experience:
Senior Data Scientist, InnovateAI Labs (2016 Present)
Led a team in developing machine learning models for natural language processing applications.
Implemented deep learning algorithms that improved prediction accuracy by 25%.
Collaborated with cross-functional teams to integrate models into cloud-based platforms.
Data Scientist, DataWave Analytics (2014 2016)
Developed predictive models for customer segmentation and churn analysis.
Analyzed large datasets using Hadoop and Spark frameworks.
Skills:
Programming Languages: Python, R, SQL
Machine Learning: TensorFlow, Keras, Scikit-Learn
Big Data Technologies: Hadoop, Spark
Data Visualization: Tableau, Matplotlib
"""
job_2 = """
CV 2: Relevant
Name: Michael Rodriguez
Contact Information:
Email: michael.rodriguez@example.com
Phone: (555) 234-5678
Summary:
Data Scientist with a strong background in machine learning and statistical modeling. Skilled in handling large datasets and translating data into actionable business insights.
Education:
M.S. in Data Science, Carnegie Mellon University (2013)
B.S. in Computer Science, University of Michigan (2011)
Experience:
Senior Data Scientist, Alpha Analytics (2017 Present)
Developed machine learning models to optimize marketing strategies.
Reduced customer acquisition cost by 15% through predictive modeling.
Data Scientist, TechInsights (2013 2017)
Analyzed user behavior data to improve product features.
Implemented A/B testing frameworks to evaluate product changes.
Skills:
Programming Languages: Python, Java, SQL
Machine Learning: Scikit-Learn, XGBoost
Data Visualization: Seaborn, Plotly
Databases: MySQL, MongoDB
"""
job_3 = """
CV 3: Relevant
Name: Sarah Nguyen
Contact Information:
Email: sarah.nguyen@example.com
Phone: (555) 345-6789
Summary:
Data Scientist specializing in machine learning with 6 years of experience. Passionate about leveraging data to drive business solutions and improve product performance.
Education:
M.S. in Statistics, University of Washington (2014)
B.S. in Applied Mathematics, University of Texas at Austin (2012)
Experience:
Data Scientist, QuantumTech (2016 Present)
Designed and implemented machine learning algorithms for financial forecasting.
Improved model efficiency by 20% through algorithm optimization.
Junior Data Scientist, DataCore Solutions (2014 2016)
Assisted in developing predictive models for supply chain optimization.
Conducted data cleaning and preprocessing on large datasets.
Skills:
Programming Languages: Python, R
Machine Learning Frameworks: PyTorch, Scikit-Learn
Statistical Analysis: SAS, SPSS
Cloud Platforms: AWS, Azure
"""
job_4 = """
CV 4: Not Relevant
Name: David Thompson
Contact Information:
Email: david.thompson@example.com
Phone: (555) 456-7890
Summary:
Creative Graphic Designer with over 8 years of experience in visual design and branding. Proficient in Adobe Creative Suite and passionate about creating compelling visuals.
Education:
B.F.A. in Graphic Design, Rhode Island School of Design (2012)
Experience:
Senior Graphic Designer, CreativeWorks Agency (2015 Present)
Led design projects for clients in various industries.
Created branding materials that increased client engagement by 30%.
Graphic Designer, Visual Innovations (2012 2015)
Designed marketing collateral, including brochures, logos, and websites.
Collaborated with the marketing team to develop cohesive brand strategies.
Skills:
Design Software: Adobe Photoshop, Illustrator, InDesign
Web Design: HTML, CSS
Specialties: Branding and Identity, Typography
"""
job_5 = """
CV 5: Not Relevant
Name: Jessica Miller
Contact Information:
Email: jessica.miller@example.com
Phone: (555) 567-8901
Summary:
Experienced Sales Manager with a strong track record in driving sales growth and building high-performing teams. Excellent communication and leadership skills.
Education:
B.A. in Business Administration, University of Southern California (2010)
Experience:
Sales Manager, Global Enterprises (2015 Present)
Managed a sales team of 15 members, achieving a 20% increase in annual revenue.
Developed sales strategies that expanded customer base by 25%.
Sales Representative, Market Leaders Inc. (2010 2015)
Consistently exceeded sales targets and received the 'Top Salesperson' award in 2013.
Skills:
Sales Strategy and Planning
Team Leadership and Development
CRM Software: Salesforce, Zoho
Negotiation and Relationship Building
"""
async def main(enable_steps):
# Step 1: Reset data and system state
if enable_steps.get("prune_data"):
await cognee.prune.prune_data()
print("Data pruned.")
if enable_steps.get("prune_system"):
await cognee.prune.prune_system(metadata=True)
print("System pruned.")
# Step 2: Add text
if enable_steps.get("add_text"):
text_list = [job_position, job_1, job_2, job_3, job_4, job_5]
for text in text_list:
await cognee.add(text)
print(f"Added text: {text[:35]}...")
# Step 3: Create knowledge graph
if enable_steps.get("cognify"):
await cognee.cognify()
print("Knowledge graph created.")
# Step 4: Query insights
if enable_steps.get("search_insights"):
search_results = await cognee.search(
SearchType.INSIGHTS,
{'query': 'Which applicant has the most relevant experience in data science?'}
)
print("Search results:")
for result_text in search_results:
print(result_text)
if __name__ == '__main__':
# Flags to enable/disable steps
steps_to_enable = {
"prune_data": True,
"prune_system": True,
"add_text": True,
"cognify": True,
"search_insights": True
}
asyncio.run(main(steps_to_enable))

236
poetry.lock generated
View file

@ -876,23 +876,6 @@ files = [
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
]
[[package]]
name = "colorlog"
version = "6.9.0"
description = "Add colours to the output of Python's logging module."
optional = false
python-versions = ">=3.6"
files = [
{file = "colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff"},
{file = "colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2"},
]
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
[package.extras]
development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
[[package]]
name = "comm"
version = "0.2.2"
@ -1292,17 +1275,6 @@ files = [
graph = ["objgraph (>=1.7.2)"]
profile = ["gprof2dot (>=2022.7.29)"]
[[package]]
name = "diskcache"
version = "5.6.3"
description = "Disk Cache -- Disk and file backed persistent cache."
optional = false
python-versions = ">=3"
files = [
{file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
{file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
]
[[package]]
name = "distro"
version = "1.9.0"
@ -1414,55 +1386,6 @@ files = [
{file = "docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e"},
]
[[package]]
name = "dspy"
version = "2.5.25"
description = "DSPy"
optional = false
python-versions = ">=3.9"
files = [
{file = "dspy-2.5.25-py3-none-any.whl", hash = "sha256:6c65dc49e1377f71fbe68dd90d851c7181898b46c36affd69de29cf0b125bb16"},
{file = "dspy-2.5.25.tar.gz", hash = "sha256:e35f391cd57ab9a822a95b91fe067eb3721b7f61e263913d9ae7b1df54f7fdd9"},
]
[package.dependencies]
backoff = "*"
datasets = "*"
diskcache = "*"
httpx = "*"
joblib = ">=1.3,<2.0"
json-repair = "*"
litellm = "<=1.49.1"
magicattr = ">=0.1.6,<0.2.0"
openai = "*"
optuna = "*"
pandas = "*"
pydantic = ">=2.0,<3.0"
regex = "*"
requests = "*"
structlog = "*"
tenacity = ">=8.2.3"
tqdm = "*"
ujson = "*"
[package.extras]
chromadb = ["chromadb (>=0.4.14,<0.5.0)"]
faiss-cpu = ["faiss-cpu", "sentence-transformers"]
fastembed = ["fastembed"]
google-vertex-ai = ["google-cloud-aiplatform (==1.43.0)"]
groq = ["groq (>=0.8.0,<0.9.0)"]
lancedb = ["lancedb (>=0.11.0,<0.12.0)"]
langfuse = ["langfuse (>=2.36.1,<2.37.0)"]
marqo = ["marqo (>=3.1.0,<3.2.0)"]
milvus = ["pymilvus (>=2.3.7,<2.4.0)"]
mongodb = ["pymongo (>=3.12.0,<3.13.0)"]
myscale = ["clickhouse-connect"]
pgvector = ["pgvector (>=0.2.5,<0.3.0)", "psycopg2 (>=2.9.9,<2.10.0)"]
pinecone = ["pinecone-client (>=2.2.4,<2.3.0)"]
qdrant = ["fastembed", "qdrant-client"]
snowflake = ["snowflake-snowpark-python"]
weaviate = ["weaviate-client (>=4.6.5,<4.7.0)"]
[[package]]
name = "email-validator"
version = "2.2.0"
@ -2742,17 +2665,6 @@ files = [
{file = "jsmin-3.0.1.tar.gz", hash = "sha256:c0959a121ef94542e807a674142606f7e90214a2b3d1eb17300244bbb5cc2bfc"},
]
[[package]]
name = "json-repair"
version = "0.30.1"
description = "A package to repair broken json strings"
optional = false
python-versions = ">=3.9"
files = [
{file = "json_repair-0.30.1-py3-none-any.whl", hash = "sha256:6fa8a05d246e282df2f812fa542bd837d671d7774eaae11191aabaac97d41e33"},
{file = "json_repair-0.30.1.tar.gz", hash = "sha256:5f075c4e3b098d78fb6cd60c34aec07a4517f14e9d423ad5364214b0e870e218"},
]
[[package]]
name = "json5"
version = "0.9.27"
@ -3334,16 +3246,6 @@ tokenizers = "*"
extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"]
proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"]
[[package]]
name = "magicattr"
version = "0.1.6"
description = "A getattr and setattr that works on nested objects, lists, dicts, and any combination thereof without resorting to eval"
optional = false
python-versions = "*"
files = [
{file = "magicattr-0.1.6-py2.py3-none-any.whl", hash = "sha256:d96b18ee45b5ee83b09c17e15d3459a64de62d538808c2f71182777dd9dbbbdf"},
]
[[package]]
name = "makefun"
version = "1.15.6"
@ -4256,33 +4158,6 @@ typing-extensions = ">=4.11,<5"
[package.extras]
datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"]
[[package]]
name = "optuna"
version = "4.0.0"
description = "A hyperparameter optimization framework"
optional = false
python-versions = ">=3.7"
files = [
{file = "optuna-4.0.0-py3-none-any.whl", hash = "sha256:a825c32d13f6085bcb2229b2724a5078f2e0f61a7533e800e580ce41a8c6c10d"},
{file = "optuna-4.0.0.tar.gz", hash = "sha256:844949f09e2a7353ab414e9cfd783cf0a647a65fc32a7236212ed6a37fe08973"},
]
[package.dependencies]
alembic = ">=1.5.0"
colorlog = "*"
numpy = "*"
packaging = ">=20.0"
PyYAML = "*"
sqlalchemy = ">=1.3.0"
tqdm = "*"
[package.extras]
benchmark = ["asv (>=0.5.0)", "botorch", "cma", "virtualenv"]
checking = ["black", "blackdoc", "flake8", "isort", "mypy", "mypy-boto3-s3", "types-PyYAML", "types-redis", "types-setuptools", "types-tqdm", "typing-extensions (>=3.10.0.0)"]
document = ["ase", "cmaes (>=0.10.0)", "fvcore", "kaleido", "lightgbm", "matplotlib (!=3.6.0)", "pandas", "pillow", "plotly (>=4.9.0)", "scikit-learn", "sphinx", "sphinx-copybutton", "sphinx-gallery", "sphinx-rtd-theme (>=1.2.0)", "torch", "torchvision"]
optional = ["boto3", "cmaes (>=0.10.0)", "google-cloud-storage", "matplotlib (!=3.6.0)", "pandas", "plotly (>=4.9.0)", "redis", "scikit-learn (>=0.24.2)", "scipy", "torch"]
test = ["coverage", "fakeredis[lua]", "kaleido", "moto", "pytest", "scipy (>=1.9.2)", "torch"]
[[package]]
name = "orjson"
version = "3.10.11"
@ -6193,6 +6068,11 @@ files = [
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
@ -6669,23 +6549,6 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""
[package.extras]
full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"]
[[package]]
name = "structlog"
version = "24.4.0"
description = "Structured Logging for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "structlog-24.4.0-py3-none-any.whl", hash = "sha256:597f61e80a91cc0749a9fd2a098ed76715a1c8a01f73e336b746504d1aad7610"},
{file = "structlog-24.4.0.tar.gz", hash = "sha256:b27bfecede327a6d2da5fbc96bd859f114ecc398a6389d664f62085ee7ae6fc4"},
]
[package.extras]
dev = ["freezegun (>=0.2.8)", "mypy (>=1.4)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "rich", "simplejson", "twisted"]
docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-mermaid", "sphinxext-opengraph", "twisted"]
tests = ["freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"]
typing = ["mypy (>=1.4)", "rich", "twisted"]
[[package]]
name = "tenacity"
version = "9.0.0"
@ -7094,93 +6957,6 @@ files = [
{file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
]
[[package]]
name = "ujson"
version = "5.10.0"
description = "Ultra fast JSON encoder and decoder for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"},
{file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"},
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"},
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"},
{file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"},
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"},
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"},
{file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"},
{file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"},
{file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"},
{file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"},
{file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"},
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"},
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"},
{file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"},
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"},
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"},
{file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"},
{file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"},
{file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"},
{file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"},
{file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"},
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"},
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"},
{file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"},
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"},
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"},
{file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"},
{file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"},
{file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"},
{file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"},
{file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"},
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"},
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"},
{file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"},
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"},
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"},
{file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"},
{file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"},
{file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"},
{file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"},
{file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"},
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"},
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"},
{file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"},
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"},
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"},
{file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"},
{file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"},
{file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"},
{file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"},
{file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"},
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"},
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"},
{file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"},
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"},
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"},
{file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"},
{file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"},
{file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"},
{file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"},
{file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"},
{file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"},
{file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"},
{file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"},
{file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"},
{file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"},
{file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"},
]
[[package]]
name = "uri-template"
version = "1.3.0"
@ -7746,4 +7522,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.0,<3.12"
content-hash = "fb09733ff7a70fb91c5f72ff0c8a8137b857557930a7aa025aad3154de4d8ceb"
content-hash = "57a154a7bbdd990e0fbe2313fa24c412dad98e47b9cd05e41bf378a3f597713f"

View file

@ -27,7 +27,6 @@ uvicorn = "0.22.0"
requests = "2.32.3"
aiohttp = "3.10.10"
typing_extensions = "4.12.2"
dspy = "2.5.25"
nest_asyncio = "1.6.0"
numpy = "1.26.4"
datasets = "3.1.0"