Merge branch 'dev' into feature/cog-3160-redis-session-conversation
This commit is contained in:
commit
aad6478fa8
2 changed files with 99 additions and 48 deletions
58
.github/workflows/e2e_tests.yml
vendored
58
.github/workflows/e2e_tests.yml
vendored
|
|
@ -1,6 +1,4 @@
|
||||||
name: Reusable Integration Tests
|
name: Reusable Integration Tests
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_call:
|
workflow_call:
|
||||||
|
|
@ -267,8 +265,6 @@ jobs:
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
run: uv run python ./cognee/tests/test_edge_ingestion.py
|
run: uv run python ./cognee/tests/test_edge_ingestion.py
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
run_concurrent_subprocess_access_test:
|
run_concurrent_subprocess_access_test:
|
||||||
name: Concurrent Subprocess access test
|
name: Concurrent Subprocess access test
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
@ -332,50 +328,24 @@ jobs:
|
||||||
DB_PASSWORD: cognee
|
DB_PASSWORD: cognee
|
||||||
run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
|
run: uv run python ./cognee/tests/test_concurrent_subprocess_access.py
|
||||||
|
|
||||||
run_conversation_sessions_test:
|
test-entity-extraction:
|
||||||
name: Conversation sessions test
|
name: Test Entity Extraction
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-22.04
|
||||||
defaults:
|
|
||||||
run:
|
|
||||||
shell: bash
|
|
||||||
services:
|
|
||||||
postgres:
|
|
||||||
image: pgvector/pgvector:pg17
|
|
||||||
env:
|
|
||||||
POSTGRES_USER: cognee
|
|
||||||
POSTGRES_PASSWORD: cognee
|
|
||||||
POSTGRES_DB: cognee_db
|
|
||||||
options: >-
|
|
||||||
--health-cmd pg_isready
|
|
||||||
--health-interval 10s
|
|
||||||
--health-timeout 5s
|
|
||||||
--health-retries 5
|
|
||||||
ports:
|
|
||||||
- 5432:5432
|
|
||||||
|
|
||||||
redis:
|
|
||||||
image: redis:7
|
|
||||||
ports:
|
|
||||||
- 6379:6379
|
|
||||||
options: >-
|
|
||||||
--health-cmd "redis-cli ping"
|
|
||||||
--health-interval 5s
|
|
||||||
--health-timeout 3s
|
|
||||||
--health-retries 5
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Check out repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Cognee Setup
|
- name: Cognee Setup
|
||||||
uses: ./.github/actions/cognee_setup
|
uses: ./.github/actions/cognee_setup
|
||||||
with:
|
with:
|
||||||
python-version: '3.11.x'
|
python-version: '3.11.x'
|
||||||
extra-dependencies: "postgres redis"
|
|
||||||
|
|
||||||
- name: Run Conversation session tests
|
- name: Dependencies already installed
|
||||||
|
run: echo "Dependencies already installed in setup"
|
||||||
|
|
||||||
|
- name: Run Entity Extraction Test
|
||||||
env:
|
env:
|
||||||
ENV: dev
|
ENV: 'dev'
|
||||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
|
@ -384,12 +354,4 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
GRAPH_DATABASE_PROVIDER: 'kuzu'
|
run: uv run python ./cognee/tests/tasks/entity_extraction/entity_extraction_test.py
|
||||||
CACHING: true
|
|
||||||
DB_PROVIDER: 'postgres'
|
|
||||||
DB_NAME: 'cognee_db'
|
|
||||||
DB_HOST: '127.0.0.1'
|
|
||||||
DB_PORT: 5432
|
|
||||||
DB_USERNAME: cognee
|
|
||||||
DB_PASSWORD: cognee
|
|
||||||
run: uv run python ./cognee/tests/test_conversation_history.py
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,89 @@
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
import cognee
|
||||||
|
import cognee.modules.ingestion as ingestion
|
||||||
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
||||||
|
from cognee.infrastructure.llm.extraction import extract_content_graph
|
||||||
|
from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
|
from cognee.modules.data.processing.document_types import TextDocument
|
||||||
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
from cognee.tasks.documents import extract_chunks_from_documents
|
||||||
|
from cognee.tasks.ingestion import save_data_item_to_storage
|
||||||
|
from cognee.infrastructure.files.utils.open_data_file import open_data_file
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_graphs(document_chunks):
|
||||||
|
"""
|
||||||
|
Extract graph, and check if entities are present
|
||||||
|
"""
|
||||||
|
|
||||||
|
extraction_results = await asyncio.gather(
|
||||||
|
*[extract_content_graph(chunk.text, KnowledgeGraph) for chunk in document_chunks]
|
||||||
|
)
|
||||||
|
|
||||||
|
return all(
|
||||||
|
any(
|
||||||
|
term in node.name.lower()
|
||||||
|
for extraction_result in extraction_results
|
||||||
|
for node in extraction_result.nodes
|
||||||
|
)
|
||||||
|
for term in ("qubit", "algorithm", "superposition")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""
|
||||||
|
Test how well the entity extraction works. Repeat graph generation a few times.
|
||||||
|
If 80% or more graphs are correctly generated, the test passes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
file_path = os.path.join(
|
||||||
|
pathlib.Path(__file__).parent.parent.parent, "test_data/Quantum_computers.txt"
|
||||||
|
)
|
||||||
|
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
await cognee.add("NLP is a subfield of computer science.")
|
||||||
|
|
||||||
|
original_file_path = await save_data_item_to_storage(file_path)
|
||||||
|
|
||||||
|
async with open_data_file(original_file_path) as file:
|
||||||
|
classified_data = ingestion.classify(file)
|
||||||
|
|
||||||
|
# data_id is the hash of original file contents + owner id to avoid duplicate data
|
||||||
|
data_id = ingestion.identify(classified_data, await get_default_user())
|
||||||
|
|
||||||
|
await cognee.add(file_path)
|
||||||
|
|
||||||
|
text_document = TextDocument(
|
||||||
|
id=data_id,
|
||||||
|
type="text",
|
||||||
|
mime_type="text/plain",
|
||||||
|
name="quantum_text",
|
||||||
|
raw_data_location=file_path,
|
||||||
|
external_metadata=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
document_chunks = []
|
||||||
|
async for chunk in extract_chunks_from_documents(
|
||||||
|
[text_document], max_chunk_size=get_max_chunk_tokens(), chunker=TextChunker
|
||||||
|
):
|
||||||
|
document_chunks.append(chunk)
|
||||||
|
|
||||||
|
number_of_reps = 5
|
||||||
|
|
||||||
|
graph_results = await asyncio.gather(
|
||||||
|
*[extract_graphs(document_chunks) for _ in range(number_of_reps)]
|
||||||
|
)
|
||||||
|
|
||||||
|
correct_graphs = [result for result in graph_results if result]
|
||||||
|
|
||||||
|
assert len(correct_graphs) >= 0.8 * number_of_reps
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
Loading…
Add table
Reference in a new issue