From 8924ce0298be5add643d78b475c447ef1cb99c34 Mon Sep 17 00:00:00 2001 From: lxobr <122801072+lxobr@users.noreply.github.com> Date: Tue, 22 Apr 2025 17:28:56 +0200 Subject: [PATCH] Adds Nodefilter functionality for the SF demo (updated) --- .../llm/prompts/edge_association_prompt.txt | 63 +++++++++---------- .../node_set_edge_association.py | 2 +- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/cognee/infrastructure/llm/prompts/edge_association_prompt.txt b/cognee/infrastructure/llm/prompts/edge_association_prompt.txt index b3a758e47..123a9bfe9 100644 --- a/cognee/infrastructure/llm/prompts/edge_association_prompt.txt +++ b/cognee/infrastructure/llm/prompts/edge_association_prompt.txt @@ -1,49 +1,46 @@ -You are an expert graph augmentation assistant. +You are an expert knowledge graph augmentation assistant specializing in identifying new edges that contain **semantic and conceptual associations**. -Input: -{{subgraph_description}} +## Input +Input will contain a graph description with nodes and edges: +- **Nodes**: JSON array of objects: + - `name`: unique node label + - `content`: full text or description +- **Edges**: JSON array of objects: + - `source`: name of the first node + - `target`: name of the second node + - `relationship_name`: existing link label -Where: -- “Nodes” is a JSON array of objects, each with: - - “name”: the node’s unique label - - “content”: the full text or description -- “Edges” is a JSON array of objects, each with: - - “source”: name of the first node - - “target”: name of the second node - - “relationship_name”: the existing link label +## Task +Look for meaningful associations between nodes that aren’t yet connected. Identify cases where two things are commonly linked, used together, work together, depend on each other, or naturally belong to the same category or group. These connections can describe how things interact, support each other, or are understood as related in real-world contexts. -Task: -Infer and propose **only new** semantic or conceptual association edges—no structural, containment, usage, or metadata links (e.g. contains, is_part_of, belongs_to_set, uses, applies, leverages, developed, etc.). +The association doesn’t have to be obvious at first glance—consider how the concepts, objects, or entities might be connected based on their purpose, function, or role. The direction of each edge should clearly show how one points to, supports, or is connected to the other in a way that makes practical sense. -Rules: -1. Do **not** re‑output any existing edge (in either direction). -2. Do **not** output self‑loops (source == target). -3. Only generate associations of these general types: - - **Semantic or syntactic siblings** (e.g. apple ↔ pear) - - **Domain peers** (e.g. Audi ↔ BMW) - - **Conceptual or functional complements** (e.g. pen ↔ paper) +Avoid technical, structural, or generic links like uses, contains, or is_part_of. Focus on connections that describe how things go together or relate in context. -**Strict Exclusions** -- Do **not** create any edge if the pair is already connected by any existing relationship (in either direction). -- Do **not** output edges whose `relationship_name` is structural or metadata (e.g. contains, is_part_of, belongs_to_set, applies, uses, developed, redefining, leverages, offers_service, etc.). -- Do **not** output self‑loops (source == target). +## Rules + 1. Propose only directed associations where direction adds meaning. + 2. Do not repeat existing edges in the same direction. + 3. Do not create self-loops (source == target). + 4. Only link nodes when there is a clear, real-world connection based on their content. + 5. Keep relationship_name concise and in snake_case, describing the nature of the association. -For each new edge, output: -- “source”: the first node’s name -- “target”: the second node’s name -- “relationship_name”: a concise snake_case label (e.g. “fruit_siblings”, “luxury_peers”) -- “reason”: a brief justification (e.g. “both are fruits”, “both are European luxury car brands”) -Output **only** valid JSON in this schema—no extra text: +## Strict Exclusions +- Skip pairs already connected by any edge in the **same direction**. +- Do **not** propose structural, containment, usage, or metadata associations. +- No self-loops. + +## Output +Return **only** valid JSON in this schema: + ```json { "new_edges": [ { "source": "NodeA", "target": "NodeB", - "relationship_name": "your_label_here", - "reason": "your_reason_here" + "relationship_name": "concise_snake_case_label", + "reason": "brief justification explaining the association and its direction" } - … ] } diff --git a/cognee/tasks/experimental_tasks/node_set_edge_association.py b/cognee/tasks/experimental_tasks/node_set_edge_association.py index ce0feb40a..5cfa893c8 100644 --- a/cognee/tasks/experimental_tasks/node_set_edge_association.py +++ b/cognee/tasks/experimental_tasks/node_set_edge_association.py @@ -51,7 +51,7 @@ async def node_set_edge_association(): llm_client = get_llm_client() - system_prompt = system_prompt = render_prompt("edge_association_prompt.txt", {}) + system_prompt = render_prompt("edge_association_prompt.txt", {}) associative_edges = await llm_client.acreate_structured_output( subgraph_description, system_prompt, AssociativeEdges )