refactor: Refactor search so graph completion is used by default (#505)

<!-- .github/pull_request_template.md -->

## Description
Refactor search so query type doesn't need to be provided to make it
simpler for new users

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **Refactor**
- Improved the search interface by standardizing parameter usage with
explicit keyword arguments for specifying search types, enhancing
clarity and consistency.
- **Tests**
- Updated test cases and example integrations to align with the revised
search parameters, ensuring consistent behavior and reliable validation
of search outcomes.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Igor Ilic 2025-02-07 17:16:34 +01:00 committed by GitHub
parent 8396fed9a1
commit 5fe7ff9883
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 119 additions and 70 deletions

View file

@ -16,13 +16,7 @@ jobs:
with:
example-location: ./examples/python/dynamic_steps_example.py
secrets:
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}

View file

@ -9,20 +9,51 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
RUNTIME__LOG_LEVEL: ERROR
jobs:
run_notebook_test:
uses: ./.github/workflows/reusable_notebook.yml
with:
notebook-location: notebooks/llama_index_cognee_integration.ipynb
secrets:
#LLM_MODEL: ${{ secrets.LLM_MODEL }}
#LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
#LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
name: test
runs-on: ubuntu-22.04
defaults:
run:
shell: bash
steps:
- name: Check out
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.11.x'
- name: Install Poetry
uses: snok/install-poetry@v1.4.1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install dependencies
run: |
pip install jupyter
pip install llama-index-graph-rag-cognee==0.1.2
- name: Execute Jupyter Notebook
env:
ENV: 'dev'
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: |
poetry run jupyter nbconvert \
--to notebook \
--execute notebooks/llama_index_cognee_integration.ipynb \
--output executed_notebook.ipynb \
--ExecutePreprocessor.timeout=1200

View file

@ -16,7 +16,7 @@ jobs:
with:
example-location: ./examples/python/multimedia_example.py
secrets:
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }} # Use OpenAI until we deploy models to handle multimedia
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}

View file

@ -16,13 +16,7 @@ jobs:
with:
example-location: ./examples/python/simple_example.py
secrets:
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}

View file

@ -12,7 +12,7 @@ We build for developers who need a reliable, production-ready data layer for AI
## What is cognee?
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases)
@ -170,7 +170,7 @@ async def main():
print(f"Searching cognee for insights with query: '{query_text}'")
# Query cognee for insights on the added text
search_results = await cognee.search(
SearchType.INSIGHTS, query_text=query_text
query_text=query_text, query_type=SearchType.INSIGHTS
)
print("Search results:")

View file

@ -8,8 +8,8 @@ from cognee.modules.search.methods import search as search_function
async def search(
query_type: SearchType,
query_text: str,
query_type: SearchType = SearchType.GRAPH_COMPLETION,
user: User = None,
datasets: Union[list[str], str, None] = None,
) -> list:

View file

@ -50,19 +50,23 @@ async def main():
random_node = (await vector_engine.search("entity.name", "AI"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:

View file

@ -47,19 +47,23 @@ async def main():
random_node = (await vector_engine.search("entity_name", "AI"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:

View file

@ -58,19 +58,23 @@ async def main():
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted INSIGHTS are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted CHUNKS are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\nExtracted SUMMARIES are:\n")
for result in search_results:

View file

@ -51,19 +51,23 @@ async def main():
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:

View file

@ -126,21 +126,25 @@ async def main():
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(
SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\n\nExtracted summaries are:\n")
for result in search_results:

View file

@ -51,19 +51,23 @@ async def main():
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.INSIGHTS, query_text=random_node_name
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:

View file

@ -51,19 +51,23 @@ async def main():
random_node = (await vector_engine.search("entity_name", "Quantum computer"))[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(SearchType.INSIGHTS, query_text=random_node_name)
search_results = await cognee.search(
query_text=random_node_name, query_type=SearchType.INSIGHTS
)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted sentences are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.CHUNKS, query_text=random_node_name)
search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=random_node_name)
assert len(search_results) != 0, "The search results list is empty."
print("\n\nExtracted chunks are:\n")
for result in search_results:
print(f"{result}\n")
search_results = await cognee.search(SearchType.SUMMARIES, query_text=random_node_name)
search_results = await cognee.search(
query_type=SearchType.SUMMARIES, query_text=random_node_name
)
assert len(search_results) != 0, "Query related summaries don't exist."
print("\nExtracted summaries are:\n")
for result in search_results:

View file

@ -76,7 +76,9 @@ async def get_context_with_cognee(
search_results = []
for search_type in search_types:
raw_search_results = await cognee.search(search_type, query_text=instance["question"])
raw_search_results = await cognee.search(
query_type=search_type, query_text=instance["question"]
)
if search_type == SearchType.INSIGHTS:
res_list = [_insight_to_string(edge) for edge in raw_search_results]

View file

@ -94,7 +94,7 @@ async def cognify_search_base_rag(content: str, context: str):
async def cognify_search_graph(content: str, context: str):
from cognee.api.v1.search import search, SearchType
results = await search(SearchType.INSIGHTS, query_text="Donald Trump")
results = await search(query_type=SearchType.INSIGHTS, query_text="Donald Trump")
print("results", results)
return results

View file

@ -186,7 +186,7 @@ async def main(enable_steps):
# Step 4: Query insights
if enable_steps.get("retriever"):
search_results = await cognee.search(
SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
query_type=SearchType.GRAPH_COMPLETION, query_text="Who has experience in design tools?"
)
print(search_results)

View file

@ -37,7 +37,7 @@ async def main():
# Query cognee for summaries of the data in the multimedia files
search_results = await cognee.search(
SearchType.SUMMARIES,
query_type=SearchType.SUMMARIES,
query_text="What is in the multimedia files?",
)

View file

@ -51,7 +51,7 @@ async def main():
query_text = "Tell me about NLP"
print(f"Searching cognee for insights with query: '{query_text}'")
# Query cognee for insights on the added text
search_results = await cognee.search(SearchType.INSIGHTS, query_text=query_text)
search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=query_text)
print("Search results:")
# Display results

View file

@ -27,7 +27,7 @@ async def entry(text: str, query: str):
await cognee.prune.prune_system(metadata=True)
await cognee.add(text)
await cognee.cognify()
search_results = await cognee.search(SearchType.GRAPH_COMPLETION, query_text=query)
search_results = await cognee.search(query_type=SearchType.GRAPH_COMPLETION, query_text=query)
return {
"text": text,

View file

@ -830,7 +830,7 @@
"node = (await vector_engine.search(\"entity_name\", \"sarah.nguyen@example.com\"))[0]\n",
"node_name = node.payload[\"text\"]\n",
"\n",
"search_results = await cognee.search(SearchType.SUMMARIES, query_text = node_name)\n",
"search_results = await cognee.search(query_type=SearchType.SUMMARIES, query_text = node_name)\n",
"print(\"\\n\\Extracted summaries are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
@ -851,7 +851,7 @@
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(SearchType.CHUNKS, query_text = node_name)\n",
"search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text = node_name)\n",
"print(\"\\n\\nExtracted chunks are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
@ -872,7 +872,7 @@
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(SearchType.INSIGHTS, query_text = node_name)\n",
"search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text = node_name)\n",
"print(\"\\n\\nExtracted sentences are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"

View file

@ -179,7 +179,7 @@
"\n",
"# Query cognee for summaries\n",
"search_results = await cognee.search(\n",
" SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
" query_type=SearchType.SUMMARIES, query_text=\"What are the main news discussed in the document?\"\n",
")\n",
"# Display search results\n",
"print(\"\\n Summary of main news discussed:\\n\")\n",

View file

@ -137,7 +137,7 @@
"\n",
"# Query cognee for summaries of the data in the multimedia files\n",
"search_results = await cognee.search(\n",
" SearchType.SUMMARIES,\n",
" query_type=SearchType.SUMMARIES,\n",
" query_text=\"What is in the multimedia files?\",\n",
")\n",
"\n",

View file

@ -566,7 +566,7 @@
"node = (await vector_engine.search(\"entity_name\", \"sarah.nguyen@example.com\"))[0]\n",
"node_name = node.payload[\"text\"]\n",
"\n",
"search_results = await cognee.search(SearchType.SUMMARIES, query_text=node_name)\n",
"search_results = await cognee.search(query_type=SearchType.SUMMARIES, query_text=node_name)\n",
"print(\"\\n\\Extracted summaries are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
@ -587,7 +587,7 @@
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(SearchType.CHUNKS, query_text=node_name)\n",
"search_results = await cognee.search(query_type=SearchType.CHUNKS, query_text=node_name)\n",
"print(\"\\n\\nExtracted chunks are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"
@ -608,7 +608,7 @@
"metadata": {},
"outputs": [],
"source": [
"search_results = await cognee.search(SearchType.INSIGHTS, query_text=node_name)\n",
"search_results = await cognee.search(query_type=SearchType.INSIGHTS, query_text=node_name)\n",
"print(\"\\n\\nExtracted sentences are:\\n\")\n",
"for result in search_results:\n",
" print(f\"{result}\\n\")"