From fc7a91d99178c0b166e7d6c4e7a29e08040cceca Mon Sep 17 00:00:00 2001
From: EricXiao <7250816+EricXiao95@users.noreply.github.com>
Date: Sat, 2 Aug 2025 22:30:08 +0800
Subject: [PATCH] feature: implement FEELING_LUCKY search type (#1178)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->
This PR implements the 'FEELING_LUCKY' search type, which intelligently
routes user queries to the most appropriate search retriever, addressing
[#1162](https://github.com/topoteretes/cognee/issues/1162).

- implement new search type FEELING_LUCKY
- Add the select_search_type function to analyze queries and choose the
proper search type
- Integrate with an LLM for intelligent search type determination
- Add logging for the search type selection process
- Support fallback to RAG_COMPLETION when the LLM selection fails
- Add tests for the new search type

## How it works
When a user selects the 'FEELING_LUCKY' search type, the system first
sends their natural language query to an LLM-based classifier. This
classifier analyzes the query's intent (e.g., is it asking for a
relationship, a summary, or a factual answer?) and selects the optimal
SearchType, such as 'INSIGHTS' or 'GRAPH_COMPLETION'. The main search
function then proceeds using this dynamically selected type. If the
classification process fails, it gracefully falls back to the default
'RAG_COMPLETION' type.

## Testing
Tests can be run with:
```bash
python -m pytest cognee/tests/unit/modules/search/search_methods_test.py -k "feeling_lucky" -v
```

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

Signed-off-by: EricXiao <taoiaox@gmail.com>
---
 cognee/api/v1/search/search.py                |  10 ++
 .../prompts/search_type_selector_prompt.txt   | 130 ++++++++++++++++++
 cognee/modules/search/methods/search.py       |   6 +-
 cognee/modules/search/operations/__init__.py  |   1 +
 .../search/operations/select_search_type.py   |  43 ++++++
 cognee/modules/search/types/SearchType.py     |   1 +
 .../modules/search/search_methods_test.py     |  55 ++++++++
 7 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt
 create mode 100644 cognee/modules/search/operations/select_search_type.py

diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index eb245f545..66ce48cc2 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -71,6 +71,12 @@ async def search(
             Best for: Advanced users, specific graph traversals, debugging.
             Returns: Raw graph query results.
 
+        **FEELING_LUCKY**:
+            Intelligently selects and runs the most appropriate search type.
+            Best for: General-purpose queries or when you're unsure which search type is best.
+            Returns: The results from the automatically selected search type.
+
+
     Args:
         query_text: Your question or search query in natural language.
             Examples:
@@ -119,6 +125,9 @@ async def search(
             **CODE**:
                 [List of structured code information with context]
 
+            **FEELING_LUCKY**:
+                [List of results in the format of the search type that is automatically selected]
+
 
 
 
@@ -130,6 +139,7 @@ async def search(
         - **CHUNKS**: Fastest, pure vector similarity search without LLM
         - **SUMMARIES**: Fast, returns pre-computed summaries
         - **CODE**: Medium speed, specialized for code understanding
+        - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
         - **top_k**: Start with 10, increase for comprehensive analysis (max 100)
         - **datasets**: Specify datasets to improve speed and relevance
 
diff --git a/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt b/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt
new file mode 100644
index 000000000..7ed2e72fc
--- /dev/null
+++ b/cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt
@@ -0,0 +1,130 @@
+You are an expert query analyzer for a **GraphRAG system**. Your primary goal is to analyze a user's query and select the single most appropriate `SearchType` tool to answer it.
+
+Here are the available `SearchType` tools and their specific functions:
+
+- **`SUMMARIES`**: The `SUMMARIES` search type retrieves summarized information from the knowledge graph. 
+
+  **Best for:** 
+
+  - Getting concise overviews of topics 
+  - Summarizing large amounts of information 
+  - Quick understanding of complex subjects
+
+* **`INSIGHTS`**: The `INSIGHTS` search type discovers connections and relationships between entities in the knowledge graph.
+
+  **Best for:**
+
+  - Discovering how entities are connected
+  - Understanding relationships between concepts
+  - Exploring the structure of your knowledge graph
+
+* **`CHUNKS`**: The `CHUNKS` search type retrieves specific facts and information chunks from the knowledge graph. 
+
+  **Best for:**
+
+  - Finding specific facts
+  - Getting direct answers to questions
+  - Retrieving precise information
+
+* **`RAG_COMPLETION`**: Use for direct factual questions that can likely be answered by retrieving a specific text passage from a document. It does not use the graph's relationship structure.
+
+  **Best for:**
+
+  - Getting detailed explanations or comprehensive answers
+  - Combining multiple pieces of information
+  - Getting a single, coherent answer that is generated from relevant text passages
+
+* **`GRAPH_COMPLETION`**: The `GRAPH_COMPLETION` search type leverages the graph structure to provide more contextually aware completions.
+
+  **Best for:**
+
+  - Complex queries requiring graph traversal
+  - Questions that benefit from understanding relationships
+  - Queries where context from connected entities matters
+
+* **`GRAPH_SUMMARY_COMPLETION`**: The `GRAPH_SUMMARY_COMPLETION` search type combines graph traversal with summarization to provide concise but comprehensive answers.
+
+  **Best for:**
+
+  - Getting summarized information that requires understanding relationships
+  - Complex topics that need concise explanations
+  - Queries that benefit from both graph structure and summarization
+
+* **`GRAPH_COMPLETION_COT`**: The `GRAPH_COMPLETION_COT` search type combines graph traversal with chain of thought to provide answers to complex multi hop questions.
+
+  **Best for:**
+
+  - Multi-hop questions that require following several linked concepts or entities
+  - Tracing relational paths in a knowledge graph while also getting clear step-by-step reasoning
+  - Summarizing completx linkages into a concise, human-readable answer once all hops have been explored
+
+* **`GRAPH_COMPLETION_CONTEXT_EXTENSION`**: The `GRAPH_COMPLETION_CONTEXT_EXTENSION` search type combines graph traversal with multi-round context extension.
+
+  **Best for:**
+
+  - Iterative, multi-hop queries where intermediate facts aren’t all present upfront
+  - Complex linkages that benefit from multi-round “search → extend context → reason” loops to uncover deep connections.
+  - Sparse or evolving graphs that require on-the-fly expansion—issuing follow-up searches to discover missing nodes or properties
+
+* **`CODE`**: The `CODE` search type is specialized for retrieving and understanding code-related information from the knowledge graph.
+
+  **Best for:**
+
+  - Code-related queries
+  - Programming examples and patterns
+  - Technical documentation searches
+
+* **`CYPHER`**: The `CYPHER` search type allows user to execute raw Cypher queries directly against your graph database.
+
+  **Best for:**
+
+  - Executing precise graph queries with full control
+  - Leveraging Cypher features and functions
+  - Getting raw data directly from the graph database
+
+* **`NATURAL_LANGUAGE`**: The `NATURAL_LANGUAGE` search type translates a natural language question into a precise Cypher query that is executed directly against the graph database.
+
+  **Best for:**
+
+  - Getting precise, structured answers from the graph using natural language.
+  - Performing advanced graph operations like filtering and aggregating data using natural language.
+  - Asking precise, database-style questions without needing to write Cypher.
+
+**Examples:**
+
+Query: "Summarize the key findings from these research papers"
+Response: `SUMMARIES`
+
+Query: "What is the relationship between the methodologies used in these papers?"
+Response: `INSIGHTS`
+
+Query: "When was Einstein born?"
+Response: `CHUNKS`
+
+Query: "Explain Einstein's contributions to physics"
+Response: `RAG_COMPLETION`
+
+Query: "Provide a comprehensive analysis of how these papers contribute to the field"
+Response: `GRAPH_COMPLETION`
+
+Query: "Explain the overall architecture of this codebase"
+Response: `GRAPH_SUMMARY_COMPLETION`
+
+Query: "Who was the father of the person who invented the lightbulb"
+Response: `GRAPH_COMPLETION_COT`
+
+Query: "What county was XY born in"
+Response: `GRAPH_COMPLETION_CONTEXT_EXTENSION`
+
+Query: "How to implement authentication in this codebase"
+Response: `CODE`
+
+Query: "MATCH (n) RETURN labels(n) as types, n.name as name LIMIT 10"
+Response: `CYPHER`
+
+Query: "Get all nodes connected to John"
+Response: `NATURAL_LANGUAGE`
+
+
+
+Your response MUST be a single word, consisting of only the chosen `SearchType` name. Do not provide any explanation.
\ No newline at end of file
diff --git a/cognee/modules/search/methods/search.py b/cognee/modules/search/methods/search.py
index 1eff23c4a..365920019 100644
--- a/cognee/modules/search/methods/search.py
+++ b/cognee/modules/search/methods/search.py
@@ -27,7 +27,7 @@ from cognee.modules.users.models import User
 from cognee.modules.data.models import Dataset
 from cognee.shared.utils import send_telemetry
 from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
-from cognee.modules.search.operations import log_query, log_result
+from cognee.modules.search.operations import log_query, log_result, select_search_type
 
 
 async def search(
@@ -129,6 +129,10 @@ async def specific_search(
         SearchType.NATURAL_LANGUAGE: NaturalLanguageRetriever().get_completion,
     }
 
+    # If the query type is FEELING_LUCKY, select the search type intelligently
+    if query_type is SearchType.FEELING_LUCKY:
+        query_type = await select_search_type(query)
+
     search_task = search_tasks.get(query_type)
 
     if search_task is None:
diff --git a/cognee/modules/search/operations/__init__.py b/cognee/modules/search/operations/__init__.py
index 41d2a4e4a..b2f9567fb 100644
--- a/cognee/modules/search/operations/__init__.py
+++ b/cognee/modules/search/operations/__init__.py
@@ -1,3 +1,4 @@
 from .log_query import log_query
 from .log_result import log_result
 from .get_history import get_history
+from .select_search_type import select_search_type
diff --git a/cognee/modules/search/operations/select_search_type.py b/cognee/modules/search/operations/select_search_type.py
new file mode 100644
index 000000000..d08074d0d
--- /dev/null
+++ b/cognee/modules/search/operations/select_search_type.py
@@ -0,0 +1,43 @@
+from cognee.infrastructure.llm.get_llm_client import get_llm_client
+from cognee.infrastructure.llm.prompts import read_query_prompt
+from cognee.modules.search.types import SearchType
+from cognee.shared.logging_utils import get_logger
+
+logger = get_logger("SearchTypeSelector")
+
+
+async def select_search_type(
+    query: str,
+    system_prompt_path: str = "search_type_selector_prompt.txt",
+) -> SearchType:
+    """
+    Analyzes the query and Selects the best search type.
+
+    Args:
+        query: The query to analyze.
+        system_prompt_path: The path to the system prompt.
+
+    Returns:
+        The best search type given by the LLM.
+    """
+    default_search_type = SearchType.RAG_COMPLETION
+    system_prompt = read_query_prompt(system_prompt_path)
+    llm_client = get_llm_client()
+
+    try:
+        response = await llm_client.acreate_structured_output(
+            text_input=query,
+            system_prompt=system_prompt,
+            response_model=str,
+        )
+
+        if response.upper() in SearchType.__members__:
+            logger.info(f"Selected lucky search type: {response.upper()}")
+            return SearchType(response.upper())
+
+        # If the response is not a valid search type, return the default search type
+        logger.info(f"LLM gives an invalid search type: {response.upper()}")
+        return default_search_type
+    except Exception as e:
+        logger.error(f"Failed to select search type intelligently from LLM: {str(e)}")
+        return default_search_type
diff --git a/cognee/modules/search/types/SearchType.py b/cognee/modules/search/types/SearchType.py
index 1c672f0f0..8248117e7 100644
--- a/cognee/modules/search/types/SearchType.py
+++ b/cognee/modules/search/types/SearchType.py
@@ -13,3 +13,4 @@ class SearchType(Enum):
     NATURAL_LANGUAGE = "NATURAL_LANGUAGE"
     GRAPH_COMPLETION_COT = "GRAPH_COMPLETION_COT"
     GRAPH_COMPLETION_CONTEXT_EXTENSION = "GRAPH_COMPLETION_CONTEXT_EXTENSION"
+    FEELING_LUCKY = "FEELING_LUCKY"
diff --git a/cognee/tests/unit/modules/search/search_methods_test.py b/cognee/tests/unit/modules/search/search_methods_test.py
index bec362144..14712f6d2 100644
--- a/cognee/tests/unit/modules/search/search_methods_test.py
+++ b/cognee/tests/unit/modules/search/search_methods_test.py
@@ -155,6 +155,61 @@ async def test_specific_search_chunks(mock_send_telemetry, mock_chunks_retriever
     assert results[0]["content"] == "Chunk result"
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "selected_type, retriever_name, expected_content, top_k",
+    [
+        (SearchType.RAG_COMPLETION, "CompletionRetriever", "RAG result from lucky search", 10),
+        (SearchType.CHUNKS, "ChunksRetriever", "Chunk result from lucky search", 5),
+        (SearchType.SUMMARIES, "SummariesRetriever", "Summary from lucky search", 15),
+        (SearchType.INSIGHTS, "InsightsRetriever", "Insight result from lucky search", 20),
+    ],
+)
+@patch.object(search_module, "select_search_type")
+@patch.object(search_module, "send_telemetry")
+async def test_specific_search_feeling_lucky(
+    mock_send_telemetry,
+    mock_select_search_type,
+    selected_type,
+    retriever_name,
+    expected_content,
+    top_k,
+    mock_user,
+):
+    with patch.object(search_module, retriever_name) as mock_retriever_class:
+        # Setup
+        query = f"test query for {retriever_name}"
+        query_type = SearchType.FEELING_LUCKY
+
+        # Mock the intelligent search type selection
+        mock_select_search_type.return_value = selected_type
+
+        # Mock the retriever
+        mock_retriever_instance = MagicMock()
+        mock_retriever_instance.get_completion = AsyncMock(
+            return_value=[{"content": expected_content}]
+        )
+        mock_retriever_class.return_value = mock_retriever_instance
+
+        # Execute
+        results = await specific_search(query_type, query, mock_user, top_k=top_k)
+
+        # Verify
+        mock_select_search_type.assert_called_once_with(query)
+
+        if retriever_name == "CompletionRetriever":
+            mock_retriever_class.assert_called_once_with(
+                system_prompt_path="answer_simple_question.txt", top_k=top_k
+            )
+        else:
+            mock_retriever_class.assert_called_once_with(top_k=top_k)
+
+        mock_retriever_instance.get_completion.assert_called_once_with(query)
+        mock_send_telemetry.assert_called()
+        assert len(results) == 1
+        assert results[0]["content"] == expected_content
+
+
 @pytest.mark.asyncio
 async def test_specific_search_invalid_type(mock_user):
     # Setup