COG-3050 - remove insights search (#1506)
<!-- .github/pull_request_template.md --> ## Description <!-- Please provide a clear, human-generated description of the changes in this PR. DO NOT use AI-generated descriptions. We want to understand your thought process and reasoning. --> As per COG-3050: 1. Remove insights search type and clean up any orphaned code 2. Replace callsites with default search type - `GRAPH_COMPLETION` - where applicable ## Type of Change <!-- Please check the relevant option --> - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [x] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) <!-- Add screenshots or videos to help explain your changes --> ## Pre-submission Checklist <!-- Please check all boxes that apply before submitting your PR --> - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
commit
fa7aa38b8f
36 changed files with 34 additions and 464 deletions
|
|
@ -89,15 +89,6 @@ export default function useChat(dataset: Dataset) {
|
|||
}
|
||||
|
||||
|
||||
interface Node {
|
||||
name: string;
|
||||
}
|
||||
|
||||
interface Relationship {
|
||||
relationship_name: string;
|
||||
}
|
||||
|
||||
type InsightMessage = [Node, Relationship, Node];
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
function convertToSearchTypeOutput(systemMessage: any[] | any, searchType: string): string {
|
||||
|
|
@ -106,14 +97,6 @@ function convertToSearchTypeOutput(systemMessage: any[] | any, searchType: strin
|
|||
}
|
||||
|
||||
switch (searchType) {
|
||||
case "INSIGHTS":
|
||||
return systemMessage.map((message: InsightMessage) => {
|
||||
const [node1, relationship, node2] = message;
|
||||
if (node1.name && node2.name) {
|
||||
return `${node1.name} ${relationship.relationship_name} ${node2.name}.`;
|
||||
}
|
||||
return "";
|
||||
}).join("\n");
|
||||
case "SUMMARIES":
|
||||
return systemMessage.map((message: { text: string }) => message.text).join("\n");
|
||||
case "CHUNKS":
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ The MCP server exposes its functionality through tools. Call them from any MCP c
|
|||
|
||||
- **codify**: Analyse a code repository, build a code graph, stores it in memory
|
||||
|
||||
- **search**: Query memory – supports GRAPH_COMPLETION, RAG_COMPLETION, CODE, CHUNKS, INSIGHTS
|
||||
- **search**: Query memory – supports GRAPH_COMPLETION, RAG_COMPLETION, CODE, CHUNKS
|
||||
|
||||
- **list_data**: List all datasets and their data items with IDs for deletion operations
|
||||
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ async def cognify(
|
|||
# 2. Get entity relationships and connections
|
||||
relationships = await cognee.search(
|
||||
"connections between concepts",
|
||||
query_type=SearchType.INSIGHTS
|
||||
query_type=SearchType.GRAPH_COMPLETION
|
||||
)
|
||||
|
||||
# 3. Find relevant document chunks
|
||||
|
|
@ -478,11 +478,6 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
Best for: Direct document retrieval, specific fact-finding.
|
||||
Returns: LLM responses based on relevant text chunks.
|
||||
|
||||
**INSIGHTS**:
|
||||
Structured entity relationships and semantic connections.
|
||||
Best for: Understanding concept relationships, knowledge mapping.
|
||||
Returns: Formatted relationship data and entity connections.
|
||||
|
||||
**CHUNKS**:
|
||||
Raw text segments that match the query semantically.
|
||||
Best for: Finding specific passages, citations, exact content.
|
||||
|
|
@ -524,7 +519,6 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
- "RAG_COMPLETION": Returns an LLM response based on the search query and standard RAG data
|
||||
- "CODE": Returns code-related knowledge in JSON format
|
||||
- "CHUNKS": Returns raw text chunks from the knowledge graph
|
||||
- "INSIGHTS": Returns relationships between nodes in readable format
|
||||
- "SUMMARIES": Returns pre-generated hierarchical summaries
|
||||
- "CYPHER": Direct graph database queries
|
||||
- "FEELING_LUCKY": Automatically selects best search type
|
||||
|
|
@ -537,7 +531,6 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
A list containing a single TextContent object with the search results.
|
||||
The format of the result depends on the search_type:
|
||||
- **GRAPH_COMPLETION/RAG_COMPLETION**: Conversational AI response strings
|
||||
- **INSIGHTS**: Formatted relationship descriptions and entity connections
|
||||
- **CHUNKS**: Relevant text passages with source metadata
|
||||
- **SUMMARIES**: Hierarchical summaries from general to specific
|
||||
- **CODE**: Structured code information with context
|
||||
|
|
@ -547,7 +540,6 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
Performance & Optimization:
|
||||
- **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
|
||||
- **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
|
||||
- **INSIGHTS**: Fast, returns structured relationships without LLM processing
|
||||
- **CHUNKS**: Fastest, pure vector similarity search without LLM
|
||||
- **SUMMARIES**: Fast, returns pre-computed summaries
|
||||
- **CODE**: Medium speed, specialized for code understanding
|
||||
|
|
@ -586,9 +578,6 @@ async def search(search_query: str, search_type: str) -> list:
|
|||
return str(search_results[0])
|
||||
elif search_type.upper() == "CHUNKS":
|
||||
return str(search_results)
|
||||
elif search_type.upper() == "INSIGHTS":
|
||||
results = retrieved_edges_to_string(search_results)
|
||||
return results
|
||||
else:
|
||||
return str(search_results)
|
||||
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ async def cognify(
|
|||
# 2. Get entity relationships and connections
|
||||
relationships = await cognee.search(
|
||||
"connections between concepts",
|
||||
query_type=SearchType.INSIGHTS
|
||||
query_type=SearchType.GRAPH_COMPLETION
|
||||
)
|
||||
|
||||
# 3. Find relevant document chunks
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
|
|||
"type": "string",
|
||||
"description": "Type of search to perform",
|
||||
"enum": [
|
||||
"INSIGHTS",
|
||||
"CODE",
|
||||
"GRAPH_COMPLETION",
|
||||
"NATURAL_LANGUAGE",
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ async def handle_search(arguments: Dict[str, Any], user) -> list:
|
|||
valid_search_types = (
|
||||
search_tool["parameters"]["properties"]["search_type"]["enum"]
|
||||
if search_tool
|
||||
else ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
|
||||
else ["CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
|
||||
)
|
||||
|
||||
if search_type_str not in valid_search_types:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
|
|||
"type": "string",
|
||||
"description": "Type of search to perform",
|
||||
"enum": [
|
||||
"INSIGHTS",
|
||||
"CODE",
|
||||
"GRAPH_COMPLETION",
|
||||
"NATURAL_LANGUAGE",
|
||||
|
|
|
|||
|
|
@ -52,11 +52,6 @@ async def search(
|
|||
Best for: Direct document retrieval, specific fact-finding.
|
||||
Returns: LLM responses based on relevant text chunks.
|
||||
|
||||
**INSIGHTS**:
|
||||
Structured entity relationships and semantic connections.
|
||||
Best for: Understanding concept relationships, knowledge mapping.
|
||||
Returns: Formatted relationship data and entity connections.
|
||||
|
||||
**CHUNKS**:
|
||||
Raw text segments that match the query semantically.
|
||||
Best for: Finding specific passages, citations, exact content.
|
||||
|
|
@ -124,9 +119,6 @@ async def search(
|
|||
**GRAPH_COMPLETION/RAG_COMPLETION**:
|
||||
[List of conversational AI response strings]
|
||||
|
||||
**INSIGHTS**:
|
||||
[List of formatted relationship descriptions and entity connections]
|
||||
|
||||
**CHUNKS**:
|
||||
[List of relevant text passages with source metadata]
|
||||
|
||||
|
|
@ -146,7 +138,6 @@ async def search(
|
|||
Performance & Optimization:
|
||||
- **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
|
||||
- **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
|
||||
- **INSIGHTS**: Fast, returns structured relationships without LLM processing
|
||||
- **CHUNKS**: Fastest, pure vector similarity search without LLM
|
||||
- **SUMMARIES**: Fast, returns pre-computed summaries
|
||||
- **CODE**: Medium speed, specialized for code understanding
|
||||
|
|
|
|||
|
|
@ -31,10 +31,6 @@ Search Types & Use Cases:
|
|||
Traditional RAG using document chunks without graph structure.
|
||||
Best for: Direct document retrieval, specific fact-finding.
|
||||
|
||||
**INSIGHTS**:
|
||||
Structured entity relationships and semantic connections.
|
||||
Best for: Understanding concept relationships, knowledge mapping.
|
||||
|
||||
**CHUNKS**:
|
||||
Raw text segments that match the query semantically.
|
||||
Best for: Finding specific passages, citations, exact content.
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ COMMAND_DESCRIPTIONS = {
|
|||
SEARCH_TYPE_CHOICES = [
|
||||
"GRAPH_COMPLETION",
|
||||
"RAG_COMPLETION",
|
||||
"INSIGHTS",
|
||||
"CHUNKS",
|
||||
"SUMMARIES",
|
||||
"CODE",
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ Here are the available `SearchType` tools and their specific functions:
|
|||
- Summarizing large amounts of information
|
||||
- Quick understanding of complex subjects
|
||||
|
||||
* **`INSIGHTS`**: The `INSIGHTS` search type discovers connections and relationships between entities in the knowledge graph.
|
||||
|
||||
**Best for:**
|
||||
|
||||
- Discovering how entities are connected
|
||||
|
|
@ -95,9 +93,6 @@ Here are the available `SearchType` tools and their specific functions:
|
|||
Query: "Summarize the key findings from these research papers"
|
||||
Response: `SUMMARIES`
|
||||
|
||||
Query: "What is the relationship between the methodologies used in these papers?"
|
||||
Response: `INSIGHTS`
|
||||
|
||||
Query: "When was Einstein born?"
|
||||
Response: `CHUNKS`
|
||||
|
||||
|
|
|
|||
|
|
@ -1,133 +0,0 @@
|
|||
import asyncio
|
||||
from typing import Any, Optional
|
||||
|
||||
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node
|
||||
from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
|
||||
|
||||
logger = get_logger("InsightsRetriever")
|
||||
|
||||
|
||||
class InsightsRetriever(BaseGraphRetriever):
|
||||
"""
|
||||
Retriever for handling graph connection-based insights.
|
||||
|
||||
Public methods include:
|
||||
- get_context
|
||||
- get_completion
|
||||
|
||||
Instance variables include:
|
||||
- exploration_levels
|
||||
- top_k
|
||||
"""
|
||||
|
||||
def __init__(self, exploration_levels: int = 1, top_k: Optional[int] = 5):
|
||||
"""Initialize retriever with exploration levels and search parameters."""
|
||||
self.exploration_levels = exploration_levels
|
||||
self.top_k = top_k
|
||||
|
||||
async def get_context(self, query: str) -> list:
|
||||
"""
|
||||
Find neighbours of a given node in the graph.
|
||||
|
||||
If the provided query does not correspond to an existing node,
|
||||
search for similar entities and retrieve their connections.
|
||||
Reraises NoDataError if there is no data found in the system.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- query (str): A string identifier for the node whose neighbours are to be
|
||||
retrieved.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- list: A list of unique connections found for the queried node.
|
||||
"""
|
||||
if query is None:
|
||||
return []
|
||||
|
||||
node_id = query
|
||||
graph_engine = await get_graph_engine()
|
||||
exact_node = await graph_engine.extract_node(node_id)
|
||||
|
||||
if exact_node is not None and "id" in exact_node:
|
||||
node_connections = await graph_engine.get_connections(str(exact_node["id"]))
|
||||
else:
|
||||
vector_engine = get_vector_engine()
|
||||
|
||||
try:
|
||||
results = await asyncio.gather(
|
||||
vector_engine.search("Entity_name", query_text=query, limit=self.top_k),
|
||||
vector_engine.search("EntityType_name", query_text=query, limit=self.top_k),
|
||||
)
|
||||
except CollectionNotFoundError as error:
|
||||
logger.error("Entity collections not found")
|
||||
raise NoDataError("No data found in the system, please add data first.") from error
|
||||
|
||||
results = [*results[0], *results[1]]
|
||||
relevant_results = [result for result in results if result.score < 0.5][: self.top_k]
|
||||
|
||||
if len(relevant_results) == 0:
|
||||
return []
|
||||
|
||||
node_connections_results = await asyncio.gather(
|
||||
*[graph_engine.get_connections(result.id) for result in relevant_results]
|
||||
)
|
||||
|
||||
node_connections = []
|
||||
for neighbours in node_connections_results:
|
||||
node_connections.extend(neighbours)
|
||||
|
||||
unique_node_connections_map = {}
|
||||
unique_node_connections = []
|
||||
|
||||
for node_connection in node_connections:
|
||||
if "id" not in node_connection[0] or "id" not in node_connection[2]:
|
||||
continue
|
||||
|
||||
unique_id = f"{node_connection[0]['id']} {node_connection[1]['relationship_name']} {node_connection[2]['id']}"
|
||||
if unique_id not in unique_node_connections_map:
|
||||
unique_node_connections_map[unique_id] = True
|
||||
unique_node_connections.append(node_connection)
|
||||
|
||||
return unique_node_connections
|
||||
# return [
|
||||
# Edge(
|
||||
# node1=Node(node_id=connection[0]["id"], attributes=connection[0]),
|
||||
# node2=Node(node_id=connection[2]["id"], attributes=connection[2]),
|
||||
# attributes={
|
||||
# **connection[1],
|
||||
# "relationship_type": connection[1]["relationship_name"],
|
||||
# },
|
||||
# )
|
||||
# for connection in unique_node_connections
|
||||
# ]
|
||||
|
||||
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
||||
"""
|
||||
Returns the graph connections context.
|
||||
|
||||
If a context is not provided, it fetches the context using the query provided.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
|
||||
- query (str): A string identifier used to fetch the context.
|
||||
- context (Optional[Any]): An optional context to use for the completion; if None,
|
||||
it fetches the context based on the query. (default None)
|
||||
|
||||
Returns:
|
||||
--------
|
||||
|
||||
- Any: The context used for the completion, which is either provided or fetched
|
||||
based on the query.
|
||||
"""
|
||||
if context is None:
|
||||
context = await self.get_context(query)
|
||||
return context
|
||||
|
|
@ -62,7 +62,7 @@ async def code_description_to_code_part(
|
|||
|
||||
try:
|
||||
if include_docs:
|
||||
search_results = await search(query_text=query, query_type="INSIGHTS")
|
||||
search_results = await search(query_text=query, query_type="GRAPH_COMPLETION")
|
||||
|
||||
concatenated_descriptions = " ".join(
|
||||
obj["description"]
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from cognee.modules.search.exceptions import UnsupportedSearchTypeError
|
|||
# Retrievers
|
||||
from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
|
||||
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
||||
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
||||
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
||||
from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
||||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
|
|
@ -44,10 +43,6 @@ async def get_search_type_tools(
|
|||
SummariesRetriever(top_k=top_k).get_completion,
|
||||
SummariesRetriever(top_k=top_k).get_context,
|
||||
],
|
||||
SearchType.INSIGHTS: [
|
||||
InsightsRetriever(top_k=top_k).get_completion,
|
||||
InsightsRetriever(top_k=top_k).get_context,
|
||||
],
|
||||
SearchType.CHUNKS: [
|
||||
ChunksRetriever(top_k=top_k).get_completion,
|
||||
ChunksRetriever(top_k=top_k).get_context,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ from enum import Enum
|
|||
|
||||
class SearchType(Enum):
|
||||
SUMMARIES = "SUMMARIES"
|
||||
INSIGHTS = "INSIGHTS"
|
||||
CHUNKS = "CHUNKS"
|
||||
RAG_COMPLETION = "RAG_COMPLETION"
|
||||
GRAPH_COMPLETION = "GRAPH_COMPLETION"
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ class TestCliConfig:
|
|||
expected_types = [
|
||||
"GRAPH_COMPLETION",
|
||||
"RAG_COMPLETION",
|
||||
"INSIGHTS",
|
||||
"CHUNKS",
|
||||
"SUMMARIES",
|
||||
"CODE",
|
||||
|
|
|
|||
|
|
@ -155,7 +155,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ async def main():
|
|||
random_node_name = random_node.payload["text"]
|
||||
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text=random_node_name
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
|
||||
)
|
||||
assert len(search_results) != 0, "The search results list is empty."
|
||||
print("\n\nExtracted sentences are:\n")
|
||||
|
|
|
|||
|
|
@ -1,251 +0,0 @@
|
|||
import os
|
||||
import pytest
|
||||
import pathlib
|
||||
|
||||
import cognee
|
||||
from cognee.low_level import setup
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.modules.engine.models import Entity, EntityType
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
||||
|
||||
|
||||
class TestInsightsRetriever:
|
||||
@pytest.mark.asyncio
|
||||
async def test_insights_context_simple(self):
|
||||
system_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_simple"
|
||||
)
|
||||
cognee.config.system_root_directory(system_directory_path)
|
||||
data_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".data_storage/test_insights_context_simple"
|
||||
)
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await setup()
|
||||
|
||||
entityTypePerson = EntityType(
|
||||
name="Person",
|
||||
description="An individual",
|
||||
)
|
||||
|
||||
person1 = Entity(
|
||||
name="Steve Rodger",
|
||||
is_a=entityTypePerson,
|
||||
description="An American actor, comedian, and filmmaker",
|
||||
)
|
||||
|
||||
person2 = Entity(
|
||||
name="Mike Broski",
|
||||
is_a=entityTypePerson,
|
||||
description="Financial advisor and philanthropist",
|
||||
)
|
||||
|
||||
person3 = Entity(
|
||||
name="Christina Mayer",
|
||||
is_a=entityTypePerson,
|
||||
description="Maker of next generation of iconic American music videos",
|
||||
)
|
||||
|
||||
entityTypeCompany = EntityType(
|
||||
name="Company",
|
||||
description="An organization that operates on an annual basis",
|
||||
)
|
||||
|
||||
company1 = Entity(
|
||||
name="Apple",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American multinational technology company headquartered in Cupertino, California",
|
||||
)
|
||||
|
||||
company2 = Entity(
|
||||
name="Google",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American multinational technology company that specializes in Internet-related services and products",
|
||||
)
|
||||
|
||||
company3 = Entity(
|
||||
name="Facebook",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American social media, messaging, and online platform",
|
||||
)
|
||||
|
||||
entities = [person1, person2, person3, company1, company2, company3]
|
||||
|
||||
await add_data_points(entities)
|
||||
|
||||
retriever = InsightsRetriever()
|
||||
|
||||
context = await retriever.get_context("Mike")
|
||||
|
||||
assert context[0][0]["name"] == "Mike Broski", "Failed to get Mike Broski"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_insights_context_complex(self):
|
||||
system_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_complex"
|
||||
)
|
||||
cognee.config.system_root_directory(system_directory_path)
|
||||
data_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".data_storage/test_insights_context_complex"
|
||||
)
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
await setup()
|
||||
|
||||
entityTypePerson = EntityType(
|
||||
name="Person",
|
||||
description="An individual",
|
||||
)
|
||||
|
||||
person1 = Entity(
|
||||
name="Steve Rodger",
|
||||
is_a=entityTypePerson,
|
||||
description="An American actor, comedian, and filmmaker",
|
||||
)
|
||||
|
||||
person2 = Entity(
|
||||
name="Mike Broski",
|
||||
is_a=entityTypePerson,
|
||||
description="Financial advisor and philanthropist",
|
||||
)
|
||||
|
||||
person3 = Entity(
|
||||
name="Christina Mayer",
|
||||
is_a=entityTypePerson,
|
||||
description="Maker of next generation of iconic American music videos",
|
||||
)
|
||||
|
||||
person4 = Entity(
|
||||
name="Jason Statham",
|
||||
is_a=entityTypePerson,
|
||||
description="An American actor",
|
||||
)
|
||||
|
||||
person5 = Entity(
|
||||
name="Mike Tyson",
|
||||
is_a=entityTypePerson,
|
||||
description="A former professional boxer from the United States",
|
||||
)
|
||||
|
||||
entityTypeCompany = EntityType(
|
||||
name="Company",
|
||||
description="An organization that operates on an annual basis",
|
||||
)
|
||||
|
||||
company1 = Entity(
|
||||
name="Apple",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American multinational technology company headquartered in Cupertino, California",
|
||||
)
|
||||
|
||||
company2 = Entity(
|
||||
name="Google",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American multinational technology company that specializes in Internet-related services and products",
|
||||
)
|
||||
|
||||
company3 = Entity(
|
||||
name="Facebook",
|
||||
is_a=entityTypeCompany,
|
||||
description="An American social media, messaging, and online platform",
|
||||
)
|
||||
|
||||
entities = [person1, person2, person3, company1, company2, company3]
|
||||
|
||||
await add_data_points(entities)
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
||||
await graph_engine.add_edges(
|
||||
[
|
||||
(
|
||||
(str)(person1.id),
|
||||
(str)(company1.id),
|
||||
"works_for",
|
||||
dict(
|
||||
relationship_name="works_for",
|
||||
source_node_id=person1.id,
|
||||
target_node_id=company1.id,
|
||||
),
|
||||
),
|
||||
(
|
||||
(str)(person2.id),
|
||||
(str)(company2.id),
|
||||
"works_for",
|
||||
dict(
|
||||
relationship_name="works_for",
|
||||
source_node_id=person2.id,
|
||||
target_node_id=company2.id,
|
||||
),
|
||||
),
|
||||
(
|
||||
(str)(person3.id),
|
||||
(str)(company3.id),
|
||||
"works_for",
|
||||
dict(
|
||||
relationship_name="works_for",
|
||||
source_node_id=person3.id,
|
||||
target_node_id=company3.id,
|
||||
),
|
||||
),
|
||||
(
|
||||
(str)(person4.id),
|
||||
(str)(company1.id),
|
||||
"works_for",
|
||||
dict(
|
||||
relationship_name="works_for",
|
||||
source_node_id=person4.id,
|
||||
target_node_id=company1.id,
|
||||
),
|
||||
),
|
||||
(
|
||||
(str)(person5.id),
|
||||
(str)(company1.id),
|
||||
"works_for",
|
||||
dict(
|
||||
relationship_name="works_for",
|
||||
source_node_id=person5.id,
|
||||
target_node_id=company1.id,
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
retriever = InsightsRetriever(top_k=20)
|
||||
|
||||
context = await retriever.get_context("Christina")
|
||||
|
||||
assert context[0][0]["name"] == "Christina Mayer", "Failed to get Christina Mayer"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_insights_context_on_empty_graph(self):
|
||||
system_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".cognee_system/test_insights_context_on_empty_graph"
|
||||
)
|
||||
cognee.config.system_root_directory(system_directory_path)
|
||||
data_directory_path = os.path.join(
|
||||
pathlib.Path(__file__).parent, ".data_storage/test_insights_context_on_empty_graph"
|
||||
)
|
||||
cognee.config.data_root_directory(data_directory_path)
|
||||
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
retriever = InsightsRetriever()
|
||||
|
||||
with pytest.raises(NoDataError):
|
||||
await retriever.get_context("Christina Mayer")
|
||||
|
||||
vector_engine = get_vector_engine()
|
||||
await vector_engine.create_collection("Entity_name", payload_schema=Entity)
|
||||
await vector_engine.create_collection("EntityType_name", payload_schema=EntityType)
|
||||
|
||||
context = await retriever.get_context("Christina Mayer")
|
||||
assert context == [], "Returned context should be empty on an empty graph"
|
||||
|
|
@ -34,7 +34,7 @@ class CogneeConfig(QABenchmarkConfig):
|
|||
system_prompt_path: str = "answer_simple_question_benchmark2.txt"
|
||||
|
||||
# Search parameters (fallback if not using eval framework)
|
||||
search_type: SearchType = SearchType.INSIGHTS
|
||||
search_type: SearchType = SearchType.GRAPH_COMPLETION
|
||||
|
||||
# Clean slate on initialization
|
||||
clean_start: bool = True
|
||||
|
|
|
|||
|
|
@ -57,7 +57,9 @@ async def main():
|
|||
|
||||
# Now let's perform some searches
|
||||
# 1. Search for insights related to "ChromaDB"
|
||||
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="ChromaDB")
|
||||
insights_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text="ChromaDB"
|
||||
)
|
||||
print("\nInsights about ChromaDB:")
|
||||
for result in insights_results:
|
||||
print(f"- {result}")
|
||||
|
|
|
|||
|
|
@ -55,7 +55,9 @@ async def main():
|
|||
|
||||
# Now let's perform some searches
|
||||
# 1. Search for insights related to "KuzuDB"
|
||||
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="KuzuDB")
|
||||
insights_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text="KuzuDB"
|
||||
)
|
||||
print("\nInsights about KuzuDB:")
|
||||
for result in insights_results:
|
||||
print(f"- {result}")
|
||||
|
|
|
|||
|
|
@ -64,7 +64,9 @@ async def main():
|
|||
|
||||
# Now let's perform some searches
|
||||
# 1. Search for insights related to "Neo4j"
|
||||
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="Neo4j")
|
||||
insights_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text="Neo4j"
|
||||
)
|
||||
print("\nInsights about Neo4j:")
|
||||
for result in insights_results:
|
||||
print(f"- {result}")
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ async def main():
|
|||
# Now let's perform some searches
|
||||
# 1. Search for insights related to "Neptune Analytics"
|
||||
insights_results = await cognee.search(
|
||||
query_type=SearchType.INSIGHTS, query_text="Neptune Analytics"
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text="Neptune Analytics"
|
||||
)
|
||||
print("\n========Insights about Neptune Analytics========:")
|
||||
for result in insights_results:
|
||||
|
|
|
|||
|
|
@ -69,7 +69,9 @@ async def main():
|
|||
|
||||
# Now let's perform some searches
|
||||
# 1. Search for insights related to "PGVector"
|
||||
insights_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text="PGVector")
|
||||
insights_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text="PGVector"
|
||||
)
|
||||
print("\nInsights about PGVector:")
|
||||
for result in insights_results:
|
||||
print(f"- {result}")
|
||||
|
|
|
|||
|
|
@ -50,7 +50,9 @@ async def main():
|
|||
query_text = "Tell me about NLP"
|
||||
print(f"Searching cognee for insights with query: '{query_text}'")
|
||||
# Query cognee for insights on the added text
|
||||
search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=query_text)
|
||||
search_results = await cognee.search(
|
||||
query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
|
||||
)
|
||||
|
||||
print("Search results:")
|
||||
# Display results
|
||||
|
|
|
|||
2
notebooks/cognee_demo.ipynb
vendored
2
notebooks/cognee_demo.ipynb
vendored
|
|
@ -1795,7 +1795,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=node_name)\n",
|
||||
"search_results = await cognee.search(query_type=SearchType.GRAPH_COMPLETION, query_text=node_name)\n",
|
||||
"print(\"\\n\\nExtracted sentences are:\\n\")\n",
|
||||
"for result in search_results:\n",
|
||||
" print(f\"{result}\\n\")"
|
||||
|
|
|
|||
2
notebooks/neptune-analytics-example.ipynb
vendored
2
notebooks/neptune-analytics-example.ipynb
vendored
|
|
@ -295,7 +295,7 @@
|
|||
"cell_type": "code",
|
||||
"source": [
|
||||
"# Search graph insights\n",
|
||||
"insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.INSIGHTS)\n",
|
||||
"insights_results = await search(query_text=\"Neptune Analytics\", query_type=SearchType.GRAPH_COMPLETION)\n",
|
||||
"print(\"\\nInsights about Neptune Analytics:\")\n",
|
||||
"for result in insights_results:\n",
|
||||
" src_node = result[0].get(\"name\", result[0][\"type\"])\n",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue