diff --git a/cognee/pipelines/retriever/two_steps_retriever.py b/cognee/pipelines/retriever/two_steps_retriever.py index c681f3e99..ff35a0864 100644 --- a/cognee/pipelines/retriever/two_steps_retriever.py +++ b/cognee/pipelines/retriever/two_steps_retriever.py @@ -13,6 +13,46 @@ from openai import organization from sympy.codegen.fnodes import dimension +def format_triplets(edges): + def filter_attributes(obj, attributes): + """Helper function to filter out non-None properties, including nested dicts.""" + print("\n\n\n") + result = {} + for attr in attributes: + value = getattr(obj, attr, None) + if value is not None: + # If the value is a dict, extract relevant keys from it + if isinstance(value, dict): + nested_values = {k: v for k, v in value.items() if k in attributes and v is not None} + result[attr] = nested_values + else: + result[attr] = value + return result + + triplets = [] + for edge in edges: + node1 = edge.node1 + node2 = edge.node2 + edge_attributes = edge.attributes + node1_attributes = node1.attributes + node2_attributes = node2.attributes + + # Filter only non-None properties + node1_info = {key: value for key, value in node1_attributes.items() if value is not None} + node2_info = {key: value for key, value in node2_attributes.items() if value is not None} + edge_info = {key: value for key, value in edge_attributes.items() if value is not None} + + # Create the formatted triplet + triplet = ( + f"Node1: {node1_info}\n" + f"Edge: {edge_info}\n" + f"Node2: {node2_info}\n\n\n" # Add three blank lines for separation + ) + triplets.append(triplet) + + return "".join(triplets) + + async def two_step_retriever(query: Dict[str, str], user: User = None) -> list: if user is None: user = await get_default_user() @@ -25,7 +65,6 @@ async def two_step_retriever(query: Dict[str, str], user: User = None) -> list: filtered_search_results = [] - return retrieved_results @@ -55,7 +94,7 @@ async def run_two_step_retriever(query: str, user, community_filter = []) -> lis ) ############################################# This part is a quick fix til we don't fix the vector db inconsistency - results_dict = delete_duplicated_vector_db_elements(collections, results)# :TODO: Change when vector db is fixed + node_distances = delete_duplicated_vector_db_elements(collections, results)# :TODO: Change when vector db is fixed # results_dict = {collection: result for collection, result in zip(collections, results)} ############################################## @@ -63,15 +102,19 @@ async def run_two_step_retriever(query: str, user, community_filter = []) -> lis await memory_fragment.project_graph_from_db(graph_engine, node_properties_to_project=['id', - 'community'], + 'description', + 'name', + 'type', + 'text'], edge_properties_to_project=['id', - 'relationship_name'], - directed=True, - node_dimension=1, - edge_dimension=1, - memory_fragment_filter=[]) + 'relationship_name']) - print() + await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances) + + await memory_fragment.map_vector_distances_to_graph_edges(vector_engine, query)# :TODO: This should be coming from vector db + + results = await memory_fragment.calculate_top_triplet_importances(k=5) - raise(NotImplementedError) \ No newline at end of file + print(format_triplets(results)) + print(f'Query was the following:{query}' ) diff --git a/examples/python/dynamic_steps_example.py b/examples/python/dynamic_steps_example.py index 11c2f1110..f4aa0aaf7 100644 --- a/examples/python/dynamic_steps_example.py +++ b/examples/python/dynamic_steps_example.py @@ -2,32 +2,6 @@ import cognee import asyncio from cognee.pipelines.retriever.two_steps_retriever import two_step_retriever -job_position = """0:Senior Data Scientist (Machine Learning) - -Company: TechNova Solutions -Location: San Francisco, CA - -Job Description: - -TechNova Solutions is seeking a Senior Data Scientist specializing in Machine Learning to join our dynamic analytics team. The ideal candidate will have a strong background in developing and deploying machine learning models, working with large datasets, and translating complex data into actionable insights. - -Responsibilities: - -Develop and implement advanced machine learning algorithms and models. -Analyze large, complex datasets to extract meaningful patterns and insights. -Collaborate with cross-functional teams to integrate predictive models into products. -Stay updated with the latest advancements in machine learning and data science. -Mentor junior data scientists and provide technical guidance. -Qualifications: - -Master’s or Ph.D. in Data Science, Computer Science, Statistics, or a related field. -5+ years of experience in data science and machine learning. -Proficient in Python, R, and SQL. -Experience with deep learning frameworks (e.g., TensorFlow, PyTorch). -Strong problem-solving skills and attention to detail. -Candidate CVs -""" - job_1 = """ CV 1: Relevant Name: Dr. Emily Carter @@ -195,7 +169,7 @@ async def main(enable_steps): # Step 2: Add text if enable_steps.get("add_text"): - text_list = [job_position, job_1, job_2, job_3, job_4, job_5] + text_list = [job_1, job_2, job_3, job_4, job_5] for text in text_list: await cognee.add(text) print(f"Added text: {text[:35]}...") @@ -207,22 +181,20 @@ async def main(enable_steps): # Step 4: Query insights if enable_steps.get("retriever"): - search_results = await two_step_retriever( - {'query': 'Which applicant has the most relevant experience in data science?'} - ) - print("Search results:") - for result_text in search_results: - print(result_text) + await two_step_retriever('Who has Phd?') if __name__ == '__main__': # Flags to enable/disable steps + + rebuild_kg = False + retrieve = True steps_to_enable = { - "prune_data": False, - "prune_system": False, - "add_text": False, - "cognify": False, - "retriever": True + "prune_data": rebuild_kg, + "prune_system": rebuild_kg, + "add_text": rebuild_kg, + "cognify": rebuild_kg, + "retriever": retrieve } asyncio.run(main(steps_to_enable))