diff --git a/.env.template b/.env.template
index 781e82428..ddcd41a6c 100644
--- a/.env.template
+++ b/.env.template
@@ -116,7 +116,15 @@ VECTOR_DB_PROVIDER="lancedb"
VECTOR_DB_URL=
VECTOR_DB_KEY=
+################################################################################
+# 🧩 Ontology resolver settings
+################################################################################
+# -- Ontology resolver params --------------------------------------
+# ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and owl file to read ontology structures
+# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
+# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
+# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.
################################################################################
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
diff --git a/.github/workflows/disable_independent_workflows.sh b/.github/workflows/disable_independent_workflows.sh
index 693c3092d..ff57da80d 100755
--- a/.github/workflows/disable_independent_workflows.sh
+++ b/.github/workflows/disable_independent_workflows.sh
@@ -10,7 +10,7 @@ WORKFLOWS=(
"test_kuzu.yml"
"test_multimetric_qa_eval_run.yaml"
"test_graphrag_vs_rag_notebook.yml"
- "test_gemini.yml"
+ "test_llms.yml"
"test_multimedia_example.yaml"
"test_deduplication.yml"
"test_eval_framework.yml"
diff --git a/.github/workflows/test_gemini.yml b/.github/workflows/test_gemini.yml
deleted file mode 100644
index 544e15a5e..000000000
--- a/.github/workflows/test_gemini.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: test | gemini
-
-on:
- workflow_call:
-
-jobs:
- test-gemini:
- name: Run Gemini Test
- runs-on: ubuntu-22.04
- steps:
- - name: Check out repository
- uses: actions/checkout@v4
-
- - name: Cognee Setup
- uses: ./.github/actions/cognee_setup
- with:
- python-version: '3.11.x'
-
- - name: Run Gemini Simple Example
- env:
- LLM_PROVIDER: "gemini"
- LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
- LLM_MODEL: "gemini/gemini-1.5-flash"
- EMBEDDING_PROVIDER: "gemini"
- EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
- EMBEDDING_MODEL: "gemini/text-embedding-004"
- EMBEDDING_DIMENSIONS: "768"
- EMBEDDING_MAX_TOKENS: "8076"
- run: uv run python ./examples/python/simple_example.py
diff --git a/.github/workflows/test_llms.yml b/.github/workflows/test_llms.yml
new file mode 100644
index 000000000..5a0f947c9
--- /dev/null
+++ b/.github/workflows/test_llms.yml
@@ -0,0 +1,86 @@
+name: LLM Test Suites
+
+permissions:
+ contents: read
+
+on:
+ workflow_call:
+
+env:
+ RUNTIME__LOG_LEVEL: ERROR
+ ENV: 'dev'
+
+jobs:
+ test-gemini:
+ name: Run Gemini Test
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+
+ - name: Run Gemini Simple Example
+ env:
+ LLM_PROVIDER: "gemini"
+ LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+ LLM_MODEL: "gemini/gemini-1.5-flash"
+ EMBEDDING_PROVIDER: "gemini"
+ EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
+ EMBEDDING_MODEL: "gemini/text-embedding-004"
+ EMBEDDING_DIMENSIONS: "768"
+ EMBEDDING_MAX_TOKENS: "8076"
+ run: uv run python ./examples/python/simple_example.py
+
+ test-fastembed:
+ name: Run Fastembed Test
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+
+ - name: Run Fastembed Simple Example
+ env:
+ LLM_PROVIDER: "openai"
+ LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+ LLM_MODEL: ${{ secrets.LLM_MODEL }}
+ LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+ LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+ EMBEDDING_PROVIDER: "fastembed"
+ EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
+ EMBEDDING_DIMENSIONS: "384"
+ EMBEDDING_MAX_TOKENS: "256"
+ run: uv run python ./examples/python/simple_example.py
+
+ test-openrouter:
+ name: Run OpenRouter Test
+ runs-on: ubuntu-22.04
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Cognee Setup
+ uses: ./.github/actions/cognee_setup
+ with:
+ python-version: '3.11.x'
+
+ - name: Run OpenRouter Simple Example
+ env:
+ LLM_PROVIDER: "custom"
+ LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
+ LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
+ LLM_ENDPOINT: "https://openrouter.ai/api/v1"
+ EMBEDDING_PROVIDER: "openai"
+ EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+ EMBEDDING_MODEL: "openai/text-embedding-3-large"
+ EMBEDDING_DIMENSIONS: "3072"
+ EMBEDDING_MAX_TOKENS: "8191"
+ run: uv run python ./examples/python/simple_example.py
\ No newline at end of file
diff --git a/.github/workflows/test_openrouter.yml b/.github/workflows/test_openrouter.yml
deleted file mode 100644
index 9c2dcdebe..000000000
--- a/.github/workflows/test_openrouter.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: test | openrouter
-
-on:
- workflow_call:
-
-jobs:
- test-openrouter:
- name: Run OpenRouter Test
- runs-on: ubuntu-22.04
- steps:
- - name: Check out repository
- uses: actions/checkout@v4
-
- - name: Cognee Setup
- uses: ./.github/actions/cognee_setup
- with:
- python-version: '3.11.x'
-
- - name: Run OpenRouter Simple Example
- env:
- LLM_PROVIDER: "custom"
- LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
- LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
- LLM_ENDPOINT: "https://openrouter.ai/api/v1"
- EMBEDDING_PROVIDER: "openai"
- EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- EMBEDDING_MODEL: "openai/text-embedding-3-large"
- EMBEDDING_DIMENSIONS: "3072"
- EMBEDDING_MAX_TOKENS: "8191"
- run: uv run python ./examples/python/simple_example.py
diff --git a/.github/workflows/test_suites.yml b/.github/workflows/test_suites.yml
index 86f89249d..ff18f2962 100644
--- a/.github/workflows/test_suites.yml
+++ b/.github/workflows/test_suites.yml
@@ -115,16 +115,10 @@ jobs:
secrets: inherit
# Additional LLM tests
- gemini-tests:
- name: Gemini Tests
- needs: [basic-tests, e2e-tests]
- uses: ./.github/workflows/test_gemini.yml
- secrets: inherit
-
- openrouter-tests:
- name: OpenRouter Tests
- needs: [basic-tests, e2e-tests]
- uses: ./.github/workflows/test_openrouter.yml
+ llm-tests:
+ name: LLM Test Suite
+ needs: [ basic-tests, e2e-tests ]
+ uses: ./.github/workflows/test_llms.yml
secrets: inherit
# Ollama tests moved to the end
@@ -138,8 +132,7 @@ jobs:
different-operating-systems-tests,
vector-db-tests,
example-tests,
- gemini-tests,
- openrouter-tests,
+ llm-tests,
mcp-test,
relational-db-migration-tests,
docker-compose-test,
@@ -161,8 +154,7 @@ jobs:
example-tests,
db-examples-tests,
mcp-test,
- gemini-tests,
- openrouter-tests,
+ llm-tests,
ollama-tests,
relational-db-migration-tests,
docker-compose-test,
@@ -183,8 +175,7 @@ jobs:
"${{ needs.example-tests.result }}" == "success" &&
"${{ needs.db-examples-tests.result }}" == "success" &&
"${{ needs.relational-db-migration-tests.result }}" == "success" &&
- "${{ needs.gemini-tests.result }}" == "success" &&
- "${{ needs.openrouter-tests.result }}" == "success" &&
+ "${{ needs.llm-tests.result }}" == "success" &&
"${{ needs.docker-compose-test.result }}" == "success" &&
"${{ needs.docker-ci-test.result }}" == "success" &&
"${{ needs.ollama-tests.result }}" == "success" ]]; then
diff --git a/README.md b/README.md
index ae1f5f365..41bd1d4ea 100644
--- a/README.md
+++ b/README.md
@@ -176,16 +176,6 @@ You can also cognify your files and query using cognee UI.
-### Installation for UI
-
-To use the cognee UI with full functionality, you need to install cognee with API dependencies:
-
-```bash
-pip install 'cognee[api]'
-```
-
-The UI requires backend server functionality (uvicorn and other API dependencies) which are not included in the default cognee installation to keep it lightweight.
-
### Running the UI
Try cognee UI by running ``` cognee-cli -ui ``` command on your terminal.
diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py
index e4f91b44c..1292d243a 100644
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@@ -3,6 +3,7 @@ from pydantic import BaseModel
from typing import Union, Optional
from uuid import UUID
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.shared.logging_utils import get_logger
from cognee.shared.data_models import KnowledgeGraph
from cognee.infrastructure.llm import get_max_chunk_tokens
@@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.ontology_config import Config
+from cognee.modules.ontology.get_default_ontology_resolver import (
+ get_default_ontology_resolver,
+ get_ontology_resolver_from_env,
+)
from cognee.modules.users.models import User
from cognee.tasks.documents import (
@@ -39,7 +44,7 @@ async def cognify(
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
- ontology_file_path: Optional[str] = None,
+ config: Config = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
run_in_background: bool = False,
@@ -100,8 +105,6 @@ async def cognify(
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
Default limits: ~512-8192 tokens depending on models.
Smaller chunks = more granular but potentially fragmented knowledge.
- ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
- Useful for specialized fields like medical or legal documents.
vector_db_config: Custom vector database configuration for embeddings storage.
graph_db_config: Custom graph database configuration for relationship storage.
run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -188,11 +191,28 @@ async def cognify(
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
"""
+ if config is None:
+ ontology_config = get_ontology_env_config()
+ if (
+ ontology_config.ontology_file_path
+ and ontology_config.ontology_resolver
+ and ontology_config.matching_strategy
+ ):
+ config: Config = {
+ "ontology_config": {
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+ }
+ }
+ else:
+ config: Config = {
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+ }
+
if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size)
else:
tasks = await get_default_tasks(
- user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
+ user, graph_model, chunker, chunk_size, config, custom_prompt
)
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -216,9 +236,26 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
- ontology_file_path: Optional[str] = None,
+ config: Config = None,
custom_prompt: Optional[str] = None,
) -> list[Task]:
+ if config is None:
+ ontology_config = get_ontology_env_config()
+ if (
+ ontology_config.ontology_file_path
+ and ontology_config.ontology_resolver
+ and ontology_config.matching_strategy
+ ):
+ config: Config = {
+ "ontology_config": {
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+ }
+ }
+ else:
+ config: Config = {
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+ }
+
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -230,7 +267,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task(
extract_graph_from_data,
graph_model=graph_model,
- ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
+ config=config,
custom_prompt=custom_prompt,
task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks.
diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py
index dcebce012..7209c6036 100644
--- a/cognee/api/v1/search/search.py
+++ b/cognee/api/v1/search/search.py
@@ -82,6 +82,9 @@ async def search(
Best for: General-purpose queries or when you're unsure which search type is best.
Returns: The results from the automatically selected search type.
+ **CHUNKS_LEXICAL**:
+ Token-based lexical chunk search (e.g., Jaccard). Best for: exact-term matching, stopword-aware lookups.
+ Returns: Ranked text chunks (optionally with scores).
Args:
query_text: Your question or search query in natural language.
diff --git a/cognee/base_config.py b/cognee/base_config.py
index 2e2afb2de..a2ad06249 100644
--- a/cognee/base_config.py
+++ b/cognee/base_config.py
@@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
system_root_directory: str = get_absolute_path(".cognee_system")
cache_root_directory: str = get_absolute_path(".cognee_cache")
- monitoring_tool: object = Observer.LANGFUSE
+ monitoring_tool: object = Observer.NONE
@pydantic.model_validator(mode="after")
def validate_paths(self):
@@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
# Require absolute paths for root directories
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
- self.cache_root_directory = ensure_absolute_path(self.cache_root_directory)
+ # Set monitoring tool based on available keys
+ if self.langfuse_public_key and self.langfuse_secret_key:
+ self.monitoring_tool = Observer.LANGFUSE
+
return self
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
diff --git a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
index be532232f..fb10c7eed 100644
--- a/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
+++ b/cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py
@@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
async def get_default_tasks_by_indices(
@@ -33,7 +33,7 @@ async def get_no_summary_tasks(
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
- ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
+ ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
graph_task = Task(
extract_graph_from_data,
diff --git a/cognee/modules/data/methods/create_authorized_dataset.py b/cognee/modules/data/methods/create_authorized_dataset.py
index e43381b35..08057a6bd 100644
--- a/cognee/modules/data/methods/create_authorized_dataset.py
+++ b/cognee/modules/data/methods/create_authorized_dataset.py
@@ -6,6 +6,15 @@ from .create_dataset import create_dataset
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
+ """
+ Create a new dataset and give all permissions on this dataset to the given user.
+ Args:
+ dataset_name: Name of the dataset.
+ user: The user object.
+
+ Returns:
+ Dataset: The new authorized dataset.
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/data/methods/get_authorized_dataset.py b/cognee/modules/data/methods/get_authorized_dataset.py
index 0e30b7e0e..6c97322c8 100644
--- a/cognee/modules/data/methods/get_authorized_dataset.py
+++ b/cognee/modules/data/methods/get_authorized_dataset.py
@@ -15,7 +15,7 @@ async def get_authorized_dataset(
Get a specific dataset with permissions for a user.
Args:
- user_id (UUID): user id
+ user: User object
dataset_id (UUID): dataset id
permission_type (str): permission type(read, write, delete, share), default is read
diff --git a/cognee/modules/data/methods/get_authorized_dataset_by_name.py b/cognee/modules/data/methods/get_authorized_dataset_by_name.py
index 654dcb630..ad50e25e9 100644
--- a/cognee/modules/data/methods/get_authorized_dataset_by_name.py
+++ b/cognee/modules/data/methods/get_authorized_dataset_by_name.py
@@ -11,6 +11,17 @@ from ..models import Dataset
async def get_authorized_dataset_by_name(
dataset_name: str, user: User, permission_type: str
) -> Optional[Dataset]:
+ """
+ Get a specific dataset with the given name, with permissions for a given user.
+
+ Args:
+ dataset_name: Name of the dataset.
+ user: User object.
+ permission_type (str): permission type(read, write, delete, share), default is read
+
+ Returns:
+ Optional[Dataset]: dataset with permissions
+ """
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
diff --git a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
index 125f59e72..3b01f5af4 100644
--- a/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
+++ b/cognee/modules/graph/utils/expand_with_nodes_and_edges.py
@@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
generate_node_id,
generate_node_name,
)
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.get_default_ontology_resolver import (
+ get_default_ontology_resolver,
+ get_ontology_resolver_from_env,
+)
def _create_node_key(node_id: str, category: str) -> str:
@@ -83,7 +89,7 @@ def _process_ontology_edges(
def _create_type_node(
node_type: str,
- ontology_resolver: OntologyResolver,
+ ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@@ -141,7 +147,7 @@ def _create_entity_node(
node_name: str,
node_description: str,
type_node: EntityType,
- ontology_resolver: OntologyResolver,
+ ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@@ -198,7 +204,7 @@ def _create_entity_node(
def _process_graph_nodes(
data_chunk: DocumentChunk,
graph: KnowledgeGraph,
- ontology_resolver: OntologyResolver,
+ ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@@ -277,7 +283,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph],
- ontology_resolver: OntologyResolver = None,
+ ontology_resolver: BaseOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None,
):
"""
@@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
from the chunk content.
- ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
- types against an ontology. If None, a default OntologyResolver is created.
+ ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
+ types against an ontology. If None, a default RDFLibOntologyResolver is created.
Defaults to None.
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
existing_edges_map = {}
if ontology_resolver is None:
- ontology_resolver = OntologyResolver()
+ ontology_config = get_ontology_env_config()
+ if (
+ ontology_config.ontology_file_path
+ and ontology_config.ontology_resolver
+ and ontology_config.matching_strategy
+ ):
+ ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
+ else:
+ ontology_resolver = get_default_ontology_resolver()
added_nodes_map = {}
added_ontology_nodes_map = {}
diff --git a/cognee/modules/graph/utils/retrieve_existing_edges.py b/cognee/modules/graph/utils/retrieve_existing_edges.py
index 20cb30a26..f0aefacd4 100644
--- a/cognee/modules/graph/utils/retrieve_existing_edges.py
+++ b/cognee/modules/graph/utils/retrieve_existing_edges.py
@@ -23,8 +23,6 @@ async def retrieve_existing_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) that
were extracted from the chunk content.
- graph_engine (GraphDBInterface): Interface to the graph database that will be queried
- to check for existing edges. Must implement the has_edges() method.
Returns:
dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
diff --git a/cognee/modules/observability/get_observe.py b/cognee/modules/observability/get_observe.py
index db3655482..9ee44e46a 100644
--- a/cognee/modules/observability/get_observe.py
+++ b/cognee/modules/observability/get_observe.py
@@ -9,3 +9,17 @@ def get_observe():
from langfuse.decorators import observe
return observe
+ elif monitoring == Observer.NONE:
+ # Return a no-op decorator that handles keyword arguments
+ def no_op_decorator(*args, **kwargs):
+ if len(args) == 1 and callable(args[0]) and not kwargs:
+ # Direct decoration: @observe
+ return args[0]
+ else:
+ # Parameterized decoration: @observe(as_type="generation")
+ def decorator(func):
+ return func
+
+ return decorator
+
+ return no_op_decorator
diff --git a/cognee/modules/observability/observers.py b/cognee/modules/observability/observers.py
index 7bd0380ec..9c4aff43b 100644
--- a/cognee/modules/observability/observers.py
+++ b/cognee/modules/observability/observers.py
@@ -4,6 +4,7 @@ from enum import Enum
class Observer(str, Enum):
"""Monitoring tools"""
+ NONE = "none"
LANGFUSE = "langfuse"
LLMLITE = "llmlite"
LANGSMITH = "langsmith"
diff --git a/cognee/modules/ontology/base_ontology_resolver.py b/cognee/modules/ontology/base_ontology_resolver.py
new file mode 100644
index 000000000..7005e6981
--- /dev/null
+++ b/cognee/modules/ontology/base_ontology_resolver.py
@@ -0,0 +1,42 @@
+from abc import ABC, abstractmethod
+from typing import List, Tuple, Optional
+
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
+
+
+class BaseOntologyResolver(ABC):
+ """Abstract base class for ontology resolvers."""
+
+ def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
+ """Initialize the ontology resolver with a matching strategy.
+
+ Args:
+ matching_strategy: The strategy to use for entity matching.
+ Defaults to FuzzyMatchingStrategy if None.
+ """
+ self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
+
+ @abstractmethod
+ def build_lookup(self) -> None:
+ """Build the lookup dictionary for ontology entities."""
+ pass
+
+ @abstractmethod
+ def refresh_lookup(self) -> None:
+ """Refresh the lookup dictionary."""
+ pass
+
+ @abstractmethod
+ def find_closest_match(self, name: str, category: str) -> Optional[str]:
+ """Find the closest match for a given name in the specified category."""
+ pass
+
+ @abstractmethod
+ def get_subgraph(
+ self, node_name: str, node_type: str = "individuals", directed: bool = True
+ ) -> Tuple[
+ List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
+ ]:
+ """Get a subgraph for the given node."""
+ pass
diff --git a/cognee/modules/ontology/get_default_ontology_resolver.py b/cognee/modules/ontology/get_default_ontology_resolver.py
new file mode 100644
index 000000000..f9aebe59a
--- /dev/null
+++ b/cognee/modules/ontology/get_default_ontology_resolver.py
@@ -0,0 +1,41 @@
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+
+def get_default_ontology_resolver() -> BaseOntologyResolver:
+ return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
+
+
+def get_ontology_resolver_from_env(
+ ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
+) -> BaseOntologyResolver:
+ """
+ Create and return an ontology resolver instance based on environment parameters.
+
+ Currently, this function supports only the RDFLib-based ontology resolver
+ with a fuzzy matching strategy.
+
+ Args:
+ ontology_resolver (str): The ontology resolver type to use.
+ Supported value: "rdflib".
+ matching_strategy (str): The matching strategy to apply.
+ Supported value: "fuzzy".
+ ontology_file_path (str): Path to the ontology file required for the resolver.
+
+ Returns:
+ BaseOntologyResolver: An instance of the requested ontology resolver.
+
+ Raises:
+ EnvironmentError: If the provided resolver or strategy is unsupported,
+ or if required parameters are missing.
+ """
+ if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
+ return RDFLibOntologyResolver(
+ matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
+ )
+ else:
+ raise EnvironmentError(
+ f"Unsupported ontology resolver: {ontology_resolver}. "
+ f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
+ )
diff --git a/cognee/modules/ontology/matching_strategies.py b/cognee/modules/ontology/matching_strategies.py
new file mode 100644
index 000000000..0e8ba7b96
--- /dev/null
+++ b/cognee/modules/ontology/matching_strategies.py
@@ -0,0 +1,53 @@
+import difflib
+from abc import ABC, abstractmethod
+from typing import List, Optional
+
+
+class MatchingStrategy(ABC):
+ """Abstract base class for ontology entity matching strategies."""
+
+ @abstractmethod
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+ """Find the best match for a given name from a list of candidates.
+
+ Args:
+ name: The name to match
+ candidates: List of candidate names to match against
+
+ Returns:
+ The best matching candidate name, or None if no match found
+ """
+ pass
+
+
+class FuzzyMatchingStrategy(MatchingStrategy):
+ """Fuzzy matching strategy using difflib for approximate string matching."""
+
+ def __init__(self, cutoff: float = 0.8):
+ """Initialize fuzzy matching strategy.
+
+ Args:
+ cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
+ """
+ self.cutoff = cutoff
+
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+ """Find the closest fuzzy match for a given name.
+
+ Args:
+ name: The normalized name to match
+ candidates: List of normalized candidate names
+
+ Returns:
+ The best matching candidate name, or None if no match meets the cutoff
+ """
+ if not candidates:
+ return None
+
+ # Check for exact match first
+ if name in candidates:
+ return name
+
+ # Find fuzzy match
+ best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
+ return best_match[0] if best_match else None
diff --git a/cognee/modules/ontology/models.py b/cognee/modules/ontology/models.py
new file mode 100644
index 000000000..eefa9e5dd
--- /dev/null
+++ b/cognee/modules/ontology/models.py
@@ -0,0 +1,20 @@
+from typing import Any
+
+
+class AttachedOntologyNode:
+ """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+
+ def __init__(self, uri: Any, category: str):
+ self.uri = uri
+ self.name = self._extract_name(uri)
+ self.category = category
+
+ @staticmethod
+ def _extract_name(uri: Any) -> str:
+ uri_str = str(uri)
+ if "#" in uri_str:
+ return uri_str.split("#")[-1]
+ return uri_str.rstrip("/").split("/")[-1]
+
+ def __repr__(self):
+ return f"AttachedOntologyNode(name={self.name}, category={self.category})"
diff --git a/cognee/modules/ontology/ontology_config.py b/cognee/modules/ontology/ontology_config.py
new file mode 100644
index 000000000..397411edc
--- /dev/null
+++ b/cognee/modules/ontology/ontology_config.py
@@ -0,0 +1,24 @@
+from typing import TypedDict, Optional
+
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+
+class OntologyConfig(TypedDict, total=False):
+ """Configuration containing ontology resolver.
+
+ Attributes:
+ ontology_resolver: The ontology resolver instance to use
+ """
+
+ ontology_resolver: Optional[BaseOntologyResolver]
+
+
+class Config(TypedDict, total=False):
+ """Top-level configuration dictionary.
+
+ Attributes:
+ ontology_config: Configuration containing ontology resolver
+ """
+
+ ontology_config: Optional[OntologyConfig]
diff --git a/cognee/modules/ontology/ontology_env_config.py b/cognee/modules/ontology/ontology_env_config.py
new file mode 100644
index 000000000..a351b35e7
--- /dev/null
+++ b/cognee/modules/ontology/ontology_env_config.py
@@ -0,0 +1,45 @@
+"""This module contains the configuration for ontology handling."""
+
+from functools import lru_cache
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class OntologyEnvConfig(BaseSettings):
+ """
+ Represents the configuration for ontology handling, including parameters for
+ ontology file storage and resolution/matching strategies.
+
+ Public methods:
+ - to_dict
+
+ Instance variables:
+ - ontology_resolver
+ - ontology_matching
+ - ontology_file_path
+ - model_config
+ """
+
+ ontology_resolver: str = "rdflib"
+ matching_strategy: str = "fuzzy"
+ ontology_file_path: str = ""
+
+ model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
+
+ def to_dict(self) -> dict:
+ """
+ Return the configuration as a dictionary.
+ """
+ return {
+ "ontology_resolver": self.ontology_resolver,
+ "matching_strategy": self.matching_strategy,
+ "ontology_file_path": self.ontology_file_path,
+ }
+
+
+@lru_cache
+def get_ontology_env_config():
+ """
+ Retrieve the ontology configuration. This function utilizes caching to return a
+ singleton instance of the OntologyConfig class for efficiency.
+ """
+ return OntologyEnvConfig()
diff --git a/cognee/modules/ontology/rdf_xml/OntologyResolver.py b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
similarity index 85%
rename from cognee/modules/ontology/rdf_xml/OntologyResolver.py
rename to cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
index 7f3fa004d..2a7a03751 100644
--- a/cognee/modules/ontology/rdf_xml/OntologyResolver.py
+++ b/cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py
@@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
FindClosestMatchError,
GetSubgraphError,
)
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
logger = get_logger("OntologyAdapter")
-class AttachedOntologyNode:
- """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+class RDFLibOntologyResolver(BaseOntologyResolver):
+ """RDFLib-based ontology resolver implementation.
- def __init__(self, uri: URIRef, category: str):
- self.uri = uri
- self.name = self._extract_name(uri)
- self.category = category
+ This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
+ It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
+ """
- @staticmethod
- def _extract_name(uri: URIRef) -> str:
- uri_str = str(uri)
- if "#" in uri_str:
- return uri_str.split("#")[-1]
- return uri_str.rstrip("/").split("/")[-1]
-
- def __repr__(self):
- return f"AttachedOntologyNode(name={self.name}, category={self.category})"
-
-
-class OntologyResolver:
- def __init__(self, ontology_file: Optional[str] = None):
+ def __init__(
+ self,
+ ontology_file: Optional[str] = None,
+ matching_strategy: Optional[MatchingStrategy] = None,
+ ) -> None:
+ super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:
if ontology_file and os.path.exists(ontology_file):
@@ -60,7 +55,7 @@ class OntologyResolver:
name = uri_str.rstrip("/").split("/")[-1]
return name.lower().replace(" ", "_").strip()
- def build_lookup(self):
+ def build_lookup(self) -> None:
try:
classes: Dict[str, URIRef] = {}
individuals: Dict[str, URIRef] = {}
@@ -97,7 +92,7 @@ class OntologyResolver:
logger.error("Failed to build lookup dictionary: %s", str(e))
raise RuntimeError("Lookup build failed") from e
- def refresh_lookup(self):
+ def refresh_lookup(self) -> None:
self.build_lookup()
logger.info("Ontology lookup refreshed.")
@@ -105,13 +100,8 @@ class OntologyResolver:
try:
normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys())
- if normalized_name in possible_matches:
- return normalized_name
- best_match = difflib.get_close_matches(
- normalized_name, possible_matches, n=1, cutoff=0.8
- )
- return best_match[0] if best_match else None
+ return self.matching_strategy.find_match(normalized_name, possible_matches)
except Exception as e:
logger.error("Error in find_closest_match: %s", str(e))
raise FindClosestMatchError() from e
@@ -125,7 +115,9 @@ class OntologyResolver:
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
- ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
+ ) -> Tuple[
+ List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
+ ]:
nodes_set = set()
edges: List[Tuple[str, str, str]] = []
visited = set()
diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
index 30d0fef71..7e3d1c124 100644
--- a/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py
@@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
+ """
+ Function handles creation and dataset authorization if dataset already exist for Cognee.
+ Verifies that provided user has necessary permission for provided Dataset.
+ If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
+
+ Args:
+ dataset_id: Id of the dataset.
+ dataset_name: Name of the dataset.
+ user: Cognee User request is being processed for, if None default user will be used.
+
+ Returns:
+ Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
+ """
if not user:
user = await get_default_user()
diff --git a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
index 4f6fb8254..f91064995 100644
--- a/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
+++ b/cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py
@@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
datasets: Dataset names or Dataset UUID (in case Datasets already exist)
Returns:
-
+ Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
"""
# If no user is provided use default user
if user is None:
diff --git a/cognee/modules/retrieval/jaccard_retrival.py b/cognee/modules/retrieval/jaccard_retrival.py
new file mode 100644
index 000000000..91d2b67f7
--- /dev/null
+++ b/cognee/modules/retrieval/jaccard_retrival.py
@@ -0,0 +1,56 @@
+from cognee.modules.retrieval.lexical_retriever import LexicalRetriever
+import re
+from collections import Counter
+from typing import Optional
+class JaccardChunksRetriever(LexicalRetriever):
+ """
+ Retriever that specializes LexicalRetriever to use Jaccard similarity.
+ """
+
+ def __init__(self, top_k: int = 10, with_scores: bool = False,
+ stop_words: Optional[list[str]] = None, multiset_jaccard: bool = False):
+ """
+ Parameters
+ ----------
+ top_k : int
+ Number of top results to return.
+ with_scores : bool
+ If True, return (payload, score) pairs. Otherwise, only payloads.
+ stop_words : list[str], optional
+ List of tokens to filter out.
+ multiset_jaccard : bool
+ If True, use multiset Jaccard (frequency aware).
+ """
+ self.stop_words = {t.lower() for t in stop_words} if stop_words else set()
+ self.multiset_jaccard = multiset_jaccard
+
+ super().__init__(
+ tokenizer=self._tokenizer,
+ scorer=self._scorer,
+ top_k=top_k,
+ with_scores=with_scores
+ )
+
+ def _tokenizer(self, text: str) -> list[str]:
+ """
+ Tokenizer: lowercases, splits on word characters (w+), filters stopwords.
+ """
+ tokens = re.findall(r"\w+", text.lower())
+ return [t for t in tokens if t not in self.stop_words]
+
+ def _scorer(self, query_tokens: list[str], chunk_tokens: list[str]) -> float:
+ """
+ Jaccard similarity scorer.
+ - If multiset_jaccard=True, uses frequency-aware Jaccard.
+ - Otherwise, normal set Jaccard.
+ """
+ if self.multiset_jaccard:
+ q_counts, c_counts = Counter(query_tokens), Counter(chunk_tokens)
+ numerator = sum(min(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
+ denominator = sum(max(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
+ return numerator / denominator if denominator else 0.0
+ else:
+ q_set, c_set = set(query_tokens), set(chunk_tokens)
+ if not q_set or not c_set:
+ return 0.0
+ return len(q_set & c_set) / len(q_set | c_set)
diff --git a/cognee/modules/retrieval/lexical_retriever.py b/cognee/modules/retrieval/lexical_retriever.py
new file mode 100644
index 000000000..2292b64c8
--- /dev/null
+++ b/cognee/modules/retrieval/lexical_retriever.py
@@ -0,0 +1,117 @@
+import asyncio
+from typing import Any, Callable, Optional
+from heapq import nlargest
+
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.modules.retrieval.base_retriever import BaseRetriever
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.shared.logging_utils import get_logger
+
+
+logger = get_logger("LexicalRetriever")
+
+
+class LexicalRetriever(BaseRetriever):
+
+ def __init__(self, tokenizer: Callable, scorer: Callable, top_k: int = 10, with_scores: bool = False):
+ if not callable(tokenizer) or not callable(scorer):
+ raise TypeError("tokenizer and scorer must be callables")
+ if not isinstance(top_k, int) or top_k <= 0:
+ raise ValueError("top_k must be a positive integer")
+
+ self.tokenizer = tokenizer
+ self.scorer = scorer
+ self.top_k = top_k
+ self.with_scores = bool(with_scores)
+
+ # Cache keyed by dataset context
+ self.chunks: dict[str, Any] = {} # {chunk_id: tokens}
+ self.payloads: dict[str, Any] = {} # {chunk_id: original_document}
+ self._initialized = False
+ self._init_lock = asyncio.Lock()
+
+ async def initialize(self):
+ """Initialize retriever by reading all DocumentChunks from graph_engine."""
+ async with self._init_lock:
+ if self._initialized:
+ return
+
+ logger.info("Initializing LexicalRetriever by loading DocumentChunks from graph engine")
+
+ try:
+ graph_engine = await get_graph_engine()
+ nodes, _ = await graph_engine.get_filtered_graph_data([{"type": ["DocumentChunk"]}])
+ except Exception as e:
+ logger.error("Graph engine initialization failed")
+ raise NoDataError("Graph engine initialization failed") from e
+
+ chunk_count = 0
+ for node in nodes:
+ try:
+ chunk_id, document = node
+ except Exception:
+ logger.warning("Skipping node with unexpected shape: %r", node)
+ continue
+
+ if document.get("type") == "DocumentChunk" and document.get("text"):
+ try:
+ tokens = self.tokenizer(document["text"])
+ if not tokens:
+ continue
+ self.chunks[str(document.get("id",chunk_id))] = tokens
+ self.payloads[str(document.get("id",chunk_id))] = document
+ chunk_count += 1
+ except Exception as e:
+ logger.error("Tokenizer failed for chunk %s: %s", chunk_id, str(e))
+
+ if chunk_count == 0:
+ logger.error("Initialization completed but no valid chunks were loaded.")
+ raise NoDataError("No valid chunks loaded during initialization.")
+
+ self._initialized = True
+ logger.info("Initialized with %d document chunks", len(self.chunks))
+
+ async def get_context(self, query: str) -> Any:
+ """Retrieves relevant chunks for the given query."""
+ if not self._initialized:
+ await self.initialize()
+
+ if not self.chunks:
+ logger.warning("No chunks available in retriever")
+ return []
+
+ try:
+ query_tokens = self.tokenizer(query)
+ except Exception as e:
+ logger.error("Failed to tokenize query: %s", str(e))
+ return []
+
+ if not query_tokens:
+ logger.warning("Query produced no tokens")
+ return []
+
+ results = []
+ for chunk_id, chunk_tokens in self.chunks.items():
+ try:
+ score = self.scorer(query_tokens, chunk_tokens)
+ if not isinstance(score, (int, float)):
+ logger.warning("Non-numeric score for chunk %s → treated as 0.0", chunk_id)
+ score = 0.0
+ except Exception as e:
+ logger.error("Scorer failed for chunk %s: %s", chunk_id, str(e))
+ score = 0.0
+ results.append((chunk_id, score))
+
+ top_results = nlargest(self.top_k, results, key=lambda x: x[1])
+ logger.info("Retrieved %d/%d chunks for query (len=%d)", len(top_results), len(results), len(query_tokens))
+
+ if self.with_scores:
+ return [(self.payloads[chunk_id], score) for chunk_id, score in top_results]
+ else:
+ return [self.payloads[chunk_id] for chunk_id, _ in top_results]
+
+ async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+ """Returns context for the given query (retrieves if not provided)."""
+ if context is None:
+ context = await self.get_context(query)
+ return context
diff --git a/cognee/modules/search/methods/get_search_type_tools.py b/cognee/modules/search/methods/get_search_type_tools.py
index 551f77a16..c5ea53a62 100644
--- a/cognee/modules/search/methods/get_search_type_tools.py
+++ b/cognee/modules/search/methods/get_search_type_tools.py
@@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
+from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
from cognee.modules.retrieval.graph_summary_completion_retriever import (
GraphSummaryCompletionRetriever,
)
@@ -152,6 +153,10 @@ async def get_search_type_tools(
TemporalRetriever(top_k=top_k).get_completion,
TemporalRetriever(top_k=top_k).get_context,
],
+ SearchType.CHUNKS_LEXICAL: (lambda _r=JaccardChunksRetriever(top_k=top_k): [
+ _r.get_completion,
+ _r.get_context,
+ ])(),
SearchType.CODING_RULES: [
CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
],
diff --git a/cognee/modules/search/types/SearchType.py b/cognee/modules/search/types/SearchType.py
index f5a23efff..418aec0b5 100644
--- a/cognee/modules/search/types/SearchType.py
+++ b/cognee/modules/search/types/SearchType.py
@@ -17,3 +17,4 @@ class SearchType(Enum):
FEEDBACK = "FEEDBACK"
TEMPORAL = "TEMPORAL"
CODING_RULES = "CODING_RULES"
+ CHUNKS_LEXICAL = "CHUNKS_LEXICAL"
diff --git a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
index d8a3777b7..7960eb756 100644
--- a/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
+++ b/cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py
@@ -9,6 +9,18 @@ from uuid import UUID
async def authorized_give_permission_on_datasets(
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
):
+ """
+ Give permission to certain datasets to a user.
+ The request owner must have the necessary permission to share the datasets.
+ Args:
+ principal_id: Id of user to whom datasets are shared
+ dataset_ids: Ids of datasets to share
+ permission_name: Name of permission to give
+ owner_id: Id of the request owner
+
+ Returns:
+ None
+ """
# If only a single dataset UUID is provided transform it to a list
if not isinstance(dataset_ids, list):
dataset_ids = [dataset_ids]
diff --git a/cognee/modules/users/permissions/methods/check_permission_on_dataset.py b/cognee/modules/users/permissions/methods/check_permission_on_dataset.py
index 467da7154..d489417e0 100644
--- a/cognee/modules/users/permissions/methods/check_permission_on_dataset.py
+++ b/cognee/modules/users/permissions/methods/check_permission_on_dataset.py
@@ -10,6 +10,17 @@ logger = get_logger()
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
+ """
+ Check if a user has a specific permission on a dataset.
+ Args:
+ user: User whose permission is checked
+ permission_type: Type of permission to check
+ dataset_id: Id of the dataset
+
+ Returns:
+ None
+
+ """
if user is None:
user = await get_default_user()
diff --git a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
index a8731a773..1185dd7ad 100644
--- a/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py
@@ -11,6 +11,16 @@ logger = get_logger()
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
+ """
+ Return a list of datasets the user has permission for.
+ If the user is part of a tenant, return datasets his roles have permission for.
+ Args:
+ user
+ permission_type
+
+ Returns:
+ list[Dataset]: List of datasets user has permission for
+ """
datasets = list()
# Get all datasets User has explicit access to
datasets.extend(await get_principal_datasets(user, permission_type))
diff --git a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
index 3b053d8e7..9b1db024e 100644
--- a/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
+++ b/cognee/modules/users/permissions/methods/get_document_ids_for_user.py
@@ -8,6 +8,16 @@ from ...models import ACL, Permission
async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
+ """
+ Return a list of documents ids for which the user has read permission.
+ If datasets are specified, return only documents from those datasets.
+ Args:
+ user_id: Id of the user
+ datasets: List of datasets
+
+ Returns:
+ list[str]: List of documents for which the user has read permission
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/get_principal.py b/cognee/modules/users/permissions/methods/get_principal.py
index 53d39651a..245190cf8 100644
--- a/cognee/modules/users/permissions/methods/get_principal.py
+++ b/cognee/modules/users/permissions/methods/get_principal.py
@@ -6,6 +6,15 @@ from ...models.Principal import Principal
async def get_principal(principal_id: UUID):
+ """
+ Return information about a user based on their id
+ Args:
+ principal_id: Id of the user
+
+ Returns:
+ principal: Information about the user (principal)
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/get_principal_datasets.py b/cognee/modules/users/permissions/methods/get_principal_datasets.py
index b2385182f..a9adb8f00 100644
--- a/cognee/modules/users/permissions/methods/get_principal_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_principal_datasets.py
@@ -9,6 +9,17 @@ from ...models.ACL import ACL
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
+ """
+ Return a list of datasets for which the user (principal) has a certain permission.
+ Args:
+ principal: Information about the user
+ permission_type: Type of permission
+
+ Returns:
+ list[Dataset]: List of datasets for which the user (principal)
+ has the permission (permission_type).
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/get_role.py b/cognee/modules/users/permissions/methods/get_role.py
index 007044c43..a703fc9f9 100644
--- a/cognee/modules/users/permissions/methods/get_role.py
+++ b/cognee/modules/users/permissions/methods/get_role.py
@@ -9,6 +9,16 @@ from ...models.Role import Role
async def get_role(tenant_id: UUID, role_name: str):
+ """
+ Return the role with the name role_name of the given tenant.
+ Args:
+ tenant_id: Id of the given tenant
+ role_name: Name of the role
+
+ Returns
+ The role for the given tenant.
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
index b6ad1291d..8dee4d782 100644
--- a/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
+++ b/cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py
@@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
Return a list of datasets user has given permission for. If a list of datasets is provided,
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
Args:
- user_id:
- permission_type:
- dataset_ids:
+ user_id: Id of the user.
+ permission_type: Type of the permission.
+ dataset_ids: Ids of the provided datasets
Returns:
list[Dataset]: List of datasets user has permission for
diff --git a/cognee/modules/users/permissions/methods/get_tenant.py b/cognee/modules/users/permissions/methods/get_tenant.py
index c5bf1a633..832ff71b8 100644
--- a/cognee/modules/users/permissions/methods/get_tenant.py
+++ b/cognee/modules/users/permissions/methods/get_tenant.py
@@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
async def get_tenant(tenant_id: UUID):
+ """
+ Return information about the tenant based on the given id.
+ Args:
+ tenant_id: Id of the given tenant
+
+ Returns
+ Information about the given tenant.
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_role.py b/cognee/modules/users/permissions/methods/give_default_permission_to_role.py
index bf3b6a9c7..9d9b41c1b 100644
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_role.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_role.py
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
+ """
+ Give the permission with given name to the role with the given id as a default permission.
+ Args:
+ role_id: Id of the role
+ permission_name: Name of the permission
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py b/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py
index 57049ae2e..7baa8c244 100644
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
+ """
+ Give the permission with given name to the tenant with the given id as a default permission.
+ Args:
+ tenant_id: Id of the tenant
+ permission_name: Name of the permission
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
tenant = (
diff --git a/cognee/modules/users/permissions/methods/give_default_permission_to_user.py b/cognee/modules/users/permissions/methods/give_default_permission_to_user.py
index 40913ff12..545122fd0 100644
--- a/cognee/modules/users/permissions/methods/give_default_permission_to_user.py
+++ b/cognee/modules/users/permissions/methods/give_default_permission_to_user.py
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
+ """
+ Give the permission with given name to the user with the given id as a default permission.
+ Args:
+ user_id: Id of the tenant
+ permission_name: Name of the permission
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
diff --git a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
index 0ed536981..6d0272192 100644
--- a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
+++ b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py
@@ -24,6 +24,16 @@ async def give_permission_on_dataset(
dataset_id: UUID,
permission_name: str,
):
+ """
+ Give a specific permission on a dataset to a user.
+ Args:
+ principal: User who is being given the permission on the dataset
+ dataset_id: Id of the dataset
+ permission_name: Name of permission to give
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
diff --git a/cognee/modules/users/roles/methods/add_user_to_role.py b/cognee/modules/users/roles/methods/add_user_to_role.py
index c6d8fdb63..de5e47775 100644
--- a/cognee/modules/users/roles/methods/add_user_to_role.py
+++ b/cognee/modules/users/roles/methods/add_user_to_role.py
@@ -21,6 +21,17 @@ from cognee.modules.users.models import (
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
+ """
+ Add a user with the given id to the role with the given id.
+ Args:
+ user_id: Id of the user.
+ role_id: Id of the role.
+ owner_id: Id of the request owner.
+
+ Returns:
+ None
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
diff --git a/cognee/modules/users/roles/methods/create_role.py b/cognee/modules/users/roles/methods/create_role.py
index 897c42394..bdba4ad31 100644
--- a/cognee/modules/users/roles/methods/create_role.py
+++ b/cognee/modules/users/roles/methods/create_role.py
@@ -16,6 +16,16 @@ async def create_role(
role_name: str,
owner_id: UUID,
):
+ """
+ Create a new role with the given name, if the request owner with the given id
+ has the necessary permission.
+ Args:
+ role_name: Name of the new role.
+ owner_id: Id of the request owner.
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(owner_id)
diff --git a/cognee/modules/users/tenants/methods/add_user_to_tenant.py b/cognee/modules/users/tenants/methods/add_user_to_tenant.py
index cf0ad0535..1374067a7 100644
--- a/cognee/modules/users/tenants/methods/add_user_to_tenant.py
+++ b/cognee/modules/users/tenants/methods/add_user_to_tenant.py
@@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
+ """
+ Add a user with the given id to the tenant with the given id.
+ This can only be successful if the request owner with the given id is the tenant owner.
+ Args:
+ user_id: Id of the user.
+ tenant_id: Id of the tenant.
+ owner_id: Id of the request owner.
+
+ Returns:
+ None
+
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(user_id)
diff --git a/cognee/modules/users/tenants/methods/create_tenant.py b/cognee/modules/users/tenants/methods/create_tenant.py
index 5d68e8110..bd8abadd1 100644
--- a/cognee/modules/users/tenants/methods/create_tenant.py
+++ b/cognee/modules/users/tenants/methods/create_tenant.py
@@ -8,6 +8,16 @@ from cognee.modules.users.methods import get_user
async def create_tenant(tenant_name: str, user_id: UUID):
+ """
+ Create a new tenant with the given name, for the user with the given id.
+ This user is the owner of the tenant.
+ Args:
+ tenant_name: Name of the new tenant.
+ user_id: Id of the user.
+
+ Returns:
+ None
+ """
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
diff --git a/cognee/tasks/graph/extract_graph_from_data.py b/cognee/tasks/graph/extract_graph_from_data.py
index d81516206..e4dafe4e7 100644
--- a/cognee/tasks/graph/extract_graph_from_data.py
+++ b/cognee/tasks/graph/extract_graph_from_data.py
@@ -3,8 +3,14 @@ from typing import Type, List, Optional
from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.tasks.storage.add_data_points import add_data_points
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.ontology_config import Config
+from cognee.modules.ontology.get_default_ontology_resolver import (
+ get_default_ontology_resolver,
+ get_ontology_resolver_from_env,
+)
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
expand_with_nodes_and_edges,
@@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk],
chunk_graphs: list,
graph_model: Type[BaseModel],
- ontology_adapter: OntologyResolver,
+ ontology_resolver: BaseOntologyResolver,
) -> List[DocumentChunk]:
- """Updates DocumentChunk objects, integrates data points and edges into databases."""
+ """Integrate chunk graphs with ontology validation and store in databases.
+
+ This function processes document chunks and their associated knowledge graphs,
+ validates entities against an ontology resolver, and stores the integrated
+ data points and edges in the configured databases.
+
+ Args:
+ data_chunks: List of document chunks containing source data
+ chunk_graphs: List of knowledge graphs corresponding to each chunk
+ graph_model: Pydantic model class for graph data validation
+ ontology_resolver: Resolver for validating entities against ontology
+
+ Returns:
+ List of updated DocumentChunk objects with integrated data
+
+ Raises:
+ InvalidChunkGraphInputError: If input validation fails
+ InvalidGraphModelError: If graph model validation fails
+ InvalidOntologyAdapterError: If ontology resolver validation fails
+ """
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
@@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
)
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model)
- if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
+ if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
raise InvalidOntologyAdapterError(
- type(ontology_adapter).__name__ if ontology_adapter else "None"
+ type(ontology_resolver).__name__ if ontology_resolver else "None"
)
graph_engine = await get_graph_engine()
@@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
)
graph_nodes, graph_edges = expand_with_nodes_and_edges(
- data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
+ data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
)
if len(graph_nodes) > 0:
@@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel],
- ontology_adapter: OntologyResolver = None,
+ config: Config = None,
custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]:
"""
@@ -101,6 +126,24 @@ async def extract_graph_from_data(
if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
]
- return await integrate_chunk_graphs(
- data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
- )
+ # Extract resolver from config if provided, otherwise get default
+ if config is None:
+ ontology_config = get_ontology_env_config()
+ if (
+ ontology_config.ontology_file_path
+ and ontology_config.ontology_resolver
+ and ontology_config.matching_strategy
+ ):
+ config: Config = {
+ "ontology_config": {
+ "ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
+ }
+ }
+ else:
+ config: Config = {
+ "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
+ }
+
+ ontology_resolver = config["ontology_config"]["ontology_resolver"]
+
+ return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)
diff --git a/cognee/tasks/graph/extract_graph_from_data_v2.py b/cognee/tasks/graph/extract_graph_from_data_v2.py
index c1f43df5c..8cea6602e 100644
--- a/cognee/tasks/graph/extract_graph_from_data_v2.py
+++ b/cognee/tasks/graph/extract_graph_from_data_v2.py
@@ -3,7 +3,7 @@ from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names,
@@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
n_rounds: int = 2,
- ontology_adapter: OntologyResolver = None,
+ ontology_adapter: BaseOntologyResolver = None,
) -> List[DocumentChunk]:
- """Extract and update graph data from document chunks in multiple steps."""
+ """Extract and update graph data from document chunks using cascade extraction.
+
+ This function performs multi-step graph extraction from document chunks,
+ using cascade extraction techniques to build comprehensive knowledge graphs.
+
+ Args:
+ data_chunks: List of document chunks to process
+ n_rounds: Number of extraction rounds to perform (default: 2)
+ ontology_adapter: Resolver for validating entities against ontology
+
+ Returns:
+ List of updated DocumentChunk objects with extracted graph data
+ """
chunk_nodes = await asyncio.gather(
*[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
)
@@ -44,5 +56,5 @@ async def extract_graph_from_data(
data_chunks=data_chunks,
chunk_graphs=chunk_graphs,
graph_model=KnowledgeGraph,
- ontology_adapter=ontology_adapter or OntologyResolver(),
+ ontology_adapter=ontology_adapter,
)
diff --git a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
index 8b406e53a..dfab79732 100644
--- a/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
+++ b/cognee/tests/unit/modules/ontology/test_ontology_adapter.py
@@ -1,12 +1,14 @@
import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS
-from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def test_ontology_adapter_initialization_success():
- """Test successful initialization of OntologyAdapter."""
+ """Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver."""
- adapter = OntologyResolver()
+ adapter = get_default_ontology_resolver()
adapter.build_lookup()
assert isinstance(adapter.lookup, dict)
@@ -14,7 +16,7 @@ def test_ontology_adapter_initialization_success():
def test_ontology_adapter_initialization_file_not_found():
"""Test OntologyAdapter initialization with nonexistent file."""
- adapter = OntologyResolver(ontology_file="nonexistent.owl")
+ adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
assert adapter.graph is None
@@ -27,7 +29,7 @@ def test_build_lookup():
g.add((ns.Audi, RDF.type, ns.Car))
- resolver = OntologyResolver()
+ resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@@ -50,7 +52,7 @@ def test_find_closest_match_exact():
g.add((ns.Car, RDF.type, OWL.Class))
g.add((ns.Audi, RDF.type, ns.Car))
- resolver = OntologyResolver()
+ resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@@ -71,7 +73,7 @@ def test_find_closest_match_fuzzy():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
- resolver = OntologyResolver()
+ resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@@ -92,7 +94,7 @@ def test_find_closest_match_no_match():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
- resolver = OntologyResolver()
+ resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@@ -102,10 +104,10 @@ def test_find_closest_match_no_match():
def test_get_subgraph_no_match_rdflib():
- """Test get_subgraph returns empty results for a non-existent node."""
+ """Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver."""
g = Graph()
- resolver = OntologyResolver()
+ resolver = get_default_ontology_resolver()
resolver.graph = g
resolver.build_lookup()
@@ -138,7 +140,7 @@ def test_get_subgraph_success_rdflib():
g.add((ns.VW, owns, ns.Audi))
g.add((ns.VW, owns, ns.Porsche))
- resolver = OntologyResolver()
+ resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@@ -160,10 +162,10 @@ def test_get_subgraph_success_rdflib():
def test_refresh_lookup_rdflib():
- """Test that refresh_lookup rebuilds the lookup dict into a new object."""
+ """Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver."""
g = Graph()
- resolver = OntologyResolver()
+ resolver = get_default_ontology_resolver()
resolver.graph = g
resolver.build_lookup()
@@ -172,3 +174,318 @@ def test_refresh_lookup_rdflib():
resolver.refresh_lookup()
assert resolver.lookup is not original_lookup
+
+
+def test_fuzzy_matching_strategy_exact_match():
+ """Test FuzzyMatchingStrategy finds exact matches."""
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ strategy = FuzzyMatchingStrategy()
+ candidates = ["audi", "bmw", "mercedes"]
+
+ result = strategy.find_match("audi", candidates)
+ assert result == "audi"
+
+
+def test_fuzzy_matching_strategy_fuzzy_match():
+ """Test FuzzyMatchingStrategy finds fuzzy matches."""
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ strategy = FuzzyMatchingStrategy(cutoff=0.6)
+ candidates = ["audi", "bmw", "mercedes"]
+
+ result = strategy.find_match("audii", candidates)
+ assert result == "audi"
+
+
+def test_fuzzy_matching_strategy_no_match():
+ """Test FuzzyMatchingStrategy returns None when no match meets cutoff."""
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ strategy = FuzzyMatchingStrategy(cutoff=0.9)
+ candidates = ["audi", "bmw", "mercedes"]
+
+ result = strategy.find_match("completely_different", candidates)
+ assert result is None
+
+
+def test_fuzzy_matching_strategy_empty_candidates():
+ """Test FuzzyMatchingStrategy handles empty candidates list."""
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ strategy = FuzzyMatchingStrategy()
+
+ result = strategy.find_match("audi", [])
+ assert result is None
+
+
+def test_base_ontology_resolver_initialization():
+ """Test BaseOntologyResolver initialization with default matching strategy."""
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ class TestOntologyResolver(BaseOntologyResolver):
+ def build_lookup(self):
+ pass
+
+ def refresh_lookup(self):
+ pass
+
+ def find_closest_match(self, name, category):
+ return None
+
+ def get_subgraph(self, node_name, node_type="individuals", directed=True):
+ return [], [], None
+
+ resolver = TestOntologyResolver()
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_base_ontology_resolver_custom_matching_strategy():
+ """Test BaseOntologyResolver initialization with custom matching strategy."""
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+ class CustomMatchingStrategy(MatchingStrategy):
+ def find_match(self, name, candidates):
+ return "custom_match"
+
+ class TestOntologyResolver(BaseOntologyResolver):
+ def build_lookup(self):
+ pass
+
+ def refresh_lookup(self):
+ pass
+
+ def find_closest_match(self, name, category):
+ return None
+
+ def get_subgraph(self, node_name, node_type="individuals", directed=True):
+ return [], [], None
+
+ custom_strategy = CustomMatchingStrategy()
+ resolver = TestOntologyResolver(matching_strategy=custom_strategy)
+ assert resolver.matching_strategy == custom_strategy
+
+
+def test_ontology_config_structure():
+ """Test TypedDict structure for ontology configuration."""
+ from cognee.modules.ontology.ontology_config import Config
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ matching_strategy = FuzzyMatchingStrategy()
+ resolver = RDFLibOntologyResolver(matching_strategy=matching_strategy)
+
+ config: Config = {"ontology_config": {"ontology_resolver": resolver}}
+
+ assert config["ontology_config"]["ontology_resolver"] == resolver
+
+
+def test_get_ontology_resolver_default():
+ """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ resolver = get_default_ontology_resolver()
+
+ assert isinstance(resolver, RDFLibOntologyResolver)
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_get_default_ontology_resolver():
+ """Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ resolver = get_default_ontology_resolver()
+
+ assert isinstance(resolver, RDFLibOntologyResolver)
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_rdflib_ontology_resolver_uses_matching_strategy():
+ """Test that RDFLibOntologyResolver uses the provided matching strategy."""
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy
+
+ class TestMatchingStrategy(MatchingStrategy):
+ def find_match(self, name, candidates):
+ return "test_match" if candidates else None
+
+ ns = Namespace("http://example.org/test#")
+ g = Graph()
+ g.add((ns.Car, RDF.type, OWL.Class))
+ g.add((ns.Audi, RDF.type, ns.Car))
+
+ resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy())
+ resolver.graph = g
+ resolver.build_lookup()
+
+ result = resolver.find_closest_match("Audi", "individuals")
+ assert result == "test_match"
+
+
+def test_rdflib_ontology_resolver_default_matching_strategy():
+ """Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default."""
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ resolver = RDFLibOntologyResolver()
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+
+
+def test_get_ontology_resolver_from_env_success():
+ """Test get_ontology_resolver_from_env returns correct resolver with valid parameters."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ resolver = get_ontology_resolver_from_env(
+ ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+ )
+
+ assert isinstance(resolver, RDFLibOntologyResolver)
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+ assert resolver.ontology_file == "/test/path.owl"
+
+
+def test_get_ontology_resolver_from_env_unsupported_resolver():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported resolver."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="unsupported",
+ matching_strategy="fuzzy",
+ ontology_file_path="/test/path.owl",
+ )
+
+ assert "Unsupported ontology resolver: unsupported" in str(exc_info.value)
+ assert "Supported resolvers are: RdfLib with FuzzyMatchingStrategy" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_unsupported_strategy():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for unsupported strategy."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="rdflib",
+ matching_strategy="unsupported",
+ ontology_file_path="/test/path.owl",
+ )
+
+ assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_file_path():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for empty file path."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=""
+ )
+
+ assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_none_file_path():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for None file path."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=None
+ )
+
+ assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_resolver():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for empty resolver."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+ )
+
+ assert "Unsupported ontology resolver:" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_empty_strategy():
+ """Test get_ontology_resolver_from_env raises EnvironmentError for empty strategy."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env(
+ ontology_resolver="rdflib", matching_strategy="", ontology_file_path="/test/path.owl"
+ )
+
+ assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_default_parameters():
+ """Test get_ontology_resolver_from_env with default empty parameters raises EnvironmentError."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError) as exc_info:
+ get_ontology_resolver_from_env()
+
+ assert "Unsupported ontology resolver:" in str(exc_info.value)
+
+
+def test_get_ontology_resolver_from_env_case_sensitivity():
+ """Test get_ontology_resolver_from_env is case sensitive."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ with pytest.raises(EnvironmentError):
+ get_ontology_resolver_from_env(
+ ontology_resolver="RDFLIB",
+ matching_strategy="fuzzy",
+ ontology_file_path="/test/path.owl",
+ )
+
+ with pytest.raises(EnvironmentError):
+ get_ontology_resolver_from_env(
+ ontology_resolver="RdfLib",
+ matching_strategy="fuzzy",
+ ontology_file_path="/test/path.owl",
+ )
+
+
+def test_get_ontology_resolver_from_env_with_actual_file():
+ """Test get_ontology_resolver_from_env works with actual file path."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+ from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
+
+ resolver = get_ontology_resolver_from_env(
+ ontology_resolver="rdflib",
+ matching_strategy="fuzzy",
+ ontology_file_path="/path/to/ontology.owl",
+ )
+
+ assert isinstance(resolver, RDFLibOntologyResolver)
+ assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
+ assert resolver.ontology_file == "/path/to/ontology.owl"
+
+
+def test_get_ontology_resolver_from_env_resolver_functionality():
+ """Test that resolver created from env function works correctly."""
+ from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
+
+ resolver = get_ontology_resolver_from_env(
+ ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
+ )
+
+ resolver.build_lookup()
+ assert isinstance(resolver.lookup, dict)
+
+ result = resolver.find_closest_match("test", "individuals")
+ assert result is None # Should return None for non-existent entity
+
+ nodes, relationships, start_node = resolver.get_subgraph("test", "individuals")
+ assert nodes == []
+ assert relationships == []
+ assert start_node is None
diff --git a/examples/python/ontology_demo_example.py b/examples/python/ontology_demo_example.py
index 8243faef5..5b18e6ed4 100644
--- a/examples/python/ontology_demo_example.py
+++ b/examples/python/ontology_demo_example.py
@@ -5,6 +5,8 @@ import cognee
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.shared.logging_utils import setup_logging
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.ontology_config import Config
text_1 = """
1. Audi
@@ -60,7 +62,14 @@ async def main():
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
)
- await cognee.cognify(ontology_file_path=ontology_path)
+ # Create full config structure manually
+ config: Config = {
+ "ontology_config": {
+ "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
+ }
+ }
+
+ await cognee.cognify(config=config)
print("Knowledge with ontology created.")
# Step 4: Query insights
diff --git a/examples/python/ontology_demo_example_2.py b/examples/python/ontology_demo_example_2.py
index 22fb19862..01bcd9ae4 100644
--- a/examples/python/ontology_demo_example_2.py
+++ b/examples/python/ontology_demo_example_2.py
@@ -5,6 +5,8 @@ import os
import textwrap
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
+from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
+from cognee.modules.ontology.ontology_config import Config
async def run_pipeline(ontology_path=None):
@@ -17,7 +19,13 @@ async def run_pipeline(ontology_path=None):
await cognee.add(scientific_papers_dir)
- pipeline_run = await cognee.cognify(ontology_file_path=ontology_path)
+ config: Config = {
+ "ontology_config": {
+ "ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
+ }
+ }
+
+ pipeline_run = await cognee.cognify(config=config)
return pipeline_run
diff --git a/notebooks/ontology_demo.ipynb b/notebooks/ontology_demo.ipynb
index e48d8467d..ef4a046b8 100644
--- a/notebooks/ontology_demo.ipynb
+++ b/notebooks/ontology_demo.ipynb
@@ -36,45 +36,33 @@
},
{
"cell_type": "code",
- "execution_count": 1,
"id": "8cf7ba29f9a150af",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-03-26T16:17:55.937140Z",
- "start_time": "2025-03-26T16:17:55.908542Z"
- }
- },
- "outputs": [],
+ "metadata": {},
"source": [
"# Install required package\n",
"# !pip install cognee"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "code",
- "execution_count": null,
"id": "abb86851",
"metadata": {},
- "outputs": [],
"source": [
"import os\n",
"\n",
"# Set up OpenAI API key (required for Cognee's LLM functionality)\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"your-api-key-here\" # Replace with your API key"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "code",
- "execution_count": null,
"id": "d825d126b3a0ec26",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-03-26T16:18:09.382400Z",
- "start_time": "2025-03-26T16:18:09.342349Z"
- }
- },
- "outputs": [],
+ "metadata": {},
"source": [
"# Import required libraries\n",
"import cognee\n",
@@ -85,7 +73,9 @@
"from cognee.api.v1.search import SearchType\n",
"\n",
"logger = get_logger()"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -102,17 +92,10 @@
},
{
"cell_type": "code",
- "execution_count": 13,
"id": "4d0e4a58e4207a7d",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-04-09T17:12:54.006718Z",
- "start_time": "2025-04-09T17:12:53.992906Z"
- }
- },
- "outputs": [],
+ "metadata": {},
"source": [
- "async def run_pipeline(ontology_path=None):\n",
+ "async def run_pipeline(config=None):\n",
" # Clean existing data\n",
" await cognee.prune.prune_data()\n",
" await cognee.prune.prune_system(metadata=True)\n",
@@ -130,7 +113,7 @@
" await cognee.add(scientific_papers_dir)\n",
" \n",
" # Cognify with optional ontology\n",
- " return await cognee.cognify(ontology_file_path=ontology_path)\n",
+ " return await cognee.cognify(config=config)\n",
"\n",
"async def query_pipeline(questions):\n",
" answers = []\n",
@@ -141,7 +124,9 @@
" )\n",
" answers.append(search_results)\n",
" return answers"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -155,423 +140,11 @@
},
{
"cell_type": "code",
- "execution_count": 14,
"id": "1363772d2b48f5c0",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-04-09T17:14:31.818452Z",
- "start_time": "2025-04-09T17:12:55.491598Z"
- }
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:55:36.031761\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "--- Results WITH ontology ---\n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:55:36.330304\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:36.521821\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `88a655ee-2a8f-5e47-90b4-ccc5aee28ee5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:36.683661\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "User a6d0292a-e5d5-4087-a06d-e6e40c92ddbd has registered.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:55:36.852839\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.022061\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReading PDF: /Users/daulet/Desktop/dev/cognee/examples/data/scientific_papers/nutrients-13-01241.pdf\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.external.pypdf_loader\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.159853\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.317975\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.464301\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `88a655ee-2a8f-5e47-90b4-ccc5aee28ee5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.631226\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `88a655ee-2a8f-5e47-90b4-ccc5aee28ee5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.806056\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:37.952328\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.123930\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReading PDF: /Users/daulet/Desktop/dev/cognee/examples/data/scientific_papers/TOJ-22-0073_152Mendoza.pdf\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.external.pypdf_loader\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.230010\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.400266\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.544525\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `88a655ee-2a8f-5e47-90b4-ccc5aee28ee5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.712540\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.726158\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `3a6c74ba-93cd-56db-a1c5-9c48aa366dc5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:38.871531\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:39.018586\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:39.179788\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:55:39.369582\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:55:39 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:55:39 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:55:39 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.658679\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'study' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.660483\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee consumption study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.660854\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.661218\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'laura torres-collado' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.661612\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'laura marÃa compañ-gabucio' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.661903\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'sandra gonzález-palacios' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.662160\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'leyre notario-barandiaran' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.662469\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'alejandro oncina-cánovas' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.662774\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'jesús vioque' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.663026\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'manuela garcÃa-de la hera' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.663391\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'compound' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.663658\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeine' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.663932\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'beverage' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.664183\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'decaffeinated coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.664460\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'disease' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.664706\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.665024\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cancer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.665327\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'outcome' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.665566\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'all-cause mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.665804\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'database' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.666041\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'national death index' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.666342\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'valencia nutrition study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.666631\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'diet' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.666882\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mediterranean diet' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.667152\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'duration' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.667354\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'follow-up period' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.667602\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'fda 2021 study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.667963\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'valencia nutrition survey' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.668205\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health condition' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.668473\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.668722\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'behavior' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.668963\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee consumption' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.669206\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health outcome' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.669438\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'all-cause mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.669706\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeinated coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.669937\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'period' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.670145\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for '18 years follow up' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.670439\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.670862\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'moderate consumption' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.671122\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.671349\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.671712\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chronic illness' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.671900\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'demographic' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.672117\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'elderly population' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.672357\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'spanish population' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.672771\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'nutritional survey' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:11.672995\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'study authors' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:14.617820\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:56:14 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:56:14 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:56:14 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:20.416149\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:24.327046\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:24.483512\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:24.634531\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:24.800450\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:24.959932\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.116106\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.281875\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `3a6c74ba-93cd-56db-a1c5-9c48aa366dc5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.429555\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `3a6c74ba-93cd-56db-a1c5-9c48aa366dc5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.588955\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.744193\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:25.909248\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:26.099711\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:56:26 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:56:26 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.017564\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.020412\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'michael f. mendoza' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.021095\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ralf martz sulague' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.021558\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'therese posas-mendoza' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.022129\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'carl j. lavie' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.022674\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'beverage' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.023245\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.023721\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health_condition' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.024235\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular health' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.024721\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hypertension' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.025227\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'heart failure' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.025607\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'atrial fibrillation' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.026034\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coronary heart disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.026407\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'nutrient' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.026835\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'dietary nutrient' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.027189\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'action' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.027578\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee consumption' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.027937\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.028310\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeine' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.029010\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'diterpenes' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.029272\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'phenolic acid' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.029707\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'date' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.030169\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for '2023-01-01' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.030506\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for '2000-2021' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.031084\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'medical condition' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.031452\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'atrial fibrillation' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.031900\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.032243\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'research methodology' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.032714\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'meta-analysis' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.033041\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical compounds' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.033366\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'antioxidants' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.033701\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'medical topic' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.034065\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.034340\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.034758\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health benefits' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.035022\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical compound' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.035511\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chlorogenic acid' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.035929\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ferulic acid' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.036169\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical element' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.036785\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'magnesium' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.037181\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'trigonelline' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:48.038230\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'p-coumaric acid' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:51.414844\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:56:51 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:56:51 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:56:56.891985\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:01.419876\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:01.558475\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:01.702641\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:01.871728\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:02.026804\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:02.183571\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:02.344790\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `3a6c74ba-93cd-56db-a1c5-9c48aa366dc5`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:02.688185\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 93 nodes, 194 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:03.214983\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.04s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:03 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:06.496514\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 93 nodes, 194 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:06.962219\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.02s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:07 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:12.400015\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 93 nodes, 194 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:12.800274\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:12 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:15.801364\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 93 nodes, 194 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:16.301946\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.02s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:16 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Q: What are common risk factors for Type 2 Diabetes?\n",
- "A: ['Common risk factors for Type 2 Diabetes include:\\n1. Obesity, particularly with a high body mass index (BMI).\\n2. Physical inactivity or low levels of exercise.\\n3. Unhealthy diet, particularly high in sugar and fats.\\n4. Family history of diabetes.\\n5. Age, especially being over 45 years old.\\n6. High blood pressure or hypertension.\\n7. High cholesterol levels.\\n8. History of gestational diabetes or giving birth to a baby over 9 lbs.\\n9. Ethnicity, with higher risk in certain populations (e.g., African American, Hispanic).\\n10. Insulin resistance or metabolic syndrome.']\n",
- "\n",
- "Q: What preventive measures reduce the risk of Hypertension?\n",
- "A: ['Preventive measures that reduce the risk of hypertension include moderate coffee consumption, which has been associated with a decreased risk of developing hypertension, heart failure, and atrial fibrillation. Additionally, adjustments in lifestyle factors, such as avoiding excessive coffee consumption, especially boiled or unfiltered varieties, which can raise cholesterol levels, can further help lower hypertension risk.']\n",
- "\n",
- "Q: What symptoms indicate possible Cardiovascular Disease?\n",
- "A: ['Symptoms that indicate possible Cardiovascular Disease (CVD) may include, but are not limited to, chest pain, shortness of breath, fatigue, dizziness, and palpitations. Additionally, factors such as high blood pressure, high cholesterol, diabetes, and obesity can also be signs of increased risk for CVD.']\n",
- "\n",
- "Q: What diseases are associated with Obesity?\n",
- "A: ['Diseases associated with obesity include cardiovascular disease, cancer, and diabetes. Obesity can exacerbate these conditions and increase the risk of their occurrence.']\n",
- "\n"
- ]
- }
- ],
+ "metadata": {},
"source": [
+ "from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver\n",
+ "from cognee.modules.ontology.ontology_config import Config\n",
"# Test questions\n",
"questions = [\n",
" \"What are common risk factors for Type 2 Diabetes?\",\n",
@@ -581,450 +154,28 @@
"]\n",
"\n",
"# Path to medical ontology\n",
- "ontology_path = \"examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl\" # Update with your ontology path\n",
+ "ontology_path = \"../examples/python/ontology_input_example/enriched_medical_ontology_with_classes.owl\" # Update with your ontology path\n",
+ "\n",
+ "config: Config = {\n",
+ " \"ontology_config\": {\n",
+ " \"ontology_resolver\": RDFLibOntologyResolver(ontology_file=ontology_path)\n",
+ " }\n",
+ " }\n",
"\n",
"# Run with ontology\n",
"print(\"\\n--- Results WITH ontology ---\\n\")\n",
- "await run_pipeline(ontology_path=ontology_path)\n",
+ "await run_pipeline(config=config)\n",
"answers_with = await query_pipeline(questions)\n",
"for q, a in zip(questions, answers_with):\n",
" print(f\"Q: {q}\\nA: {a}\\n\")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "code",
- "execution_count": 15,
"id": "3aa18f4cdd5ceff6",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-04-09T14:32:24.891560Z",
- "start_time": "2025-04-09T14:30:47.863808Z"
- }
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:57:25.168873\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:25.266675\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "--- Results WITHOUT ontology ---\n",
- "\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:57:25.420598\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `f4f6b83c-3555-5296-a812-107346770fbd`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:25.561245\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "User e0763f65-1749-42aa-8436-22b776b42bcf has registered.\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:57:25.702415\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:25.864741\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReading PDF: /Users/daulet/Desktop/dev/cognee/examples/data/scientific_papers/nutrients-13-01241.pdf\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.external.pypdf_loader\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.013387\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.153761\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.292634\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `f4f6b83c-3555-5296-a812-107346770fbd`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.458350\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `f4f6b83c-3555-5296-a812-107346770fbd`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.606166\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.761724\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:26.945575\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReading PDF: /Users/daulet/Desktop/dev/cognee/examples/data/scientific_papers/TOJ-22-0073_152Mendoza.pdf\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.external.pypdf_loader\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.052394\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.201593\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.360901\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `f4f6b83c-3555-5296-a812-107346770fbd`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.532808\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.561598\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `05779e2b-4ff1-5b13-8fc4-7fd789498ec4`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.722699\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:27.871031\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:28.023426\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:28.206266\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:28 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:57:28 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:57:28 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.514070\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'study' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.515588\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee consumption study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.516028\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.516456\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'laura torres-collado' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.516897\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'laura marÃa compañ-gabucio' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.517277\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'sandra gonzález-palacios' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.517597\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'leyre notario-barandiaran' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.518313\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'alejandro oncina-cánovas' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.518865\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'jesús vioque' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.519339\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'manuela garcÃa-de la hera' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.519682\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'valencia nutrition study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.520013\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'diet' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.520298\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mediterranean diet' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.520547\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'date' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.520885\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'date study received' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.521219\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'date study accepted' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.521484\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'date study published' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.521798\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mortality' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.522131\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cvd mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.522432\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cancer mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.522689\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'all-cause mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.523071\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health behavior' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.523338\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee consumption' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.523618\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'valencia nutrition survey' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.523894\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health outcome' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.524143\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.524424\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health condition' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.524691\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular diseases' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.524970\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cancer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.525248\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'beverage' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.525530\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeinated coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.525831\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'decaffeinated coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.526090\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'statistical measure' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.526348\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hazard ratio' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.526990\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'time unit' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.527685\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person-years' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.528244\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'time period' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.528578\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'study duration' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.528876\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cumulative incidence' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.529258\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.529535\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'adult life' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.529802\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chronic illness' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.530362\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'all-cause mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.530650\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'longitudinal studies' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.530924\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mediterranean lifestyle' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.531196\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'research study' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.531491\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'self-reported data' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.531710\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'spanish population' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.531986\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'sample size' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.532239\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'response bias' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.532464\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'funding sources' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.532711\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ethical approval' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:49.532977\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'informed consent' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:52.613755\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:57:52 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:57:52 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:57:52 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:57:57.853670\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:03.401474\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:03.562616\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:03.705987\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:03.855674\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.012884\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.169312\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.474564\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `05779e2b-4ff1-5b13-8fc4-7fd789498ec4`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.632222\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `05779e2b-4ff1-5b13-8fc4-7fd789498ec4`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.779116\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:04.932294\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:05.105238\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:05.297050\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:05 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:58:05 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.909979\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.912597\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'michael f. mendoza' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.913179\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ralf martz sulague' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.913706\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'therese posas-mendoza' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.914114\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'carl j. lavie' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.914529\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'beverage' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.914920\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coffee' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.915255\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health domain' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.915646\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular health' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.916079\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'disease' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.916464\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hypertension' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.916802\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'substance' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.917174\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cholesterol' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.917500\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'atrial fibrillation' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.917880\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'coronary heart disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.918233\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'phenolic acid' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.918754\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'diterpenes' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.919133\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'gene' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.919446\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cyp1a2' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.919786\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health advisory' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.920133\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeine consumption during pregnancy' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.920442\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'caffeine' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.920789\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'myocardial infarction' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.921121\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'heart failure' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.921517\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical compounds' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.921853\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'antioxidants' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.922179\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'medical condition' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.922542\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiovascular disease' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.922792\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'medical procedure' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.923044\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'cardiac surgery' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.923349\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'health metric' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.923688\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'mortality' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.923894\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'research' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.924157\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ali-hassan-sayegh et al (2014)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.924444\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'ding et al (2015)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.924734\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'lopez-garcia et al (2008)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.925225\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'de koning gans et al (2010)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.925735\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'andersen et al (2006)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.926028\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'kleemola et al (2000)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.926485\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'kim et al (2019)' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.926770\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'chemical compound' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.927582\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'blood pressure' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.927856\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:29.928277\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'moderation' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:33.804451\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:33 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\u001b[92m14:58:33 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:40.232682\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:44.523716\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:44.668235\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:44.816078\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:44.967879\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:45.126329\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:45.296802\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:45.447048\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `05779e2b-4ff1-5b13-8fc4-7fd789498ec4`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:45.774857\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 102 nodes, 214 edges in 0.00s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:46.269205\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.02s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:46 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:49.120648\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 102 nodes, 214 edges in 0.00s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:49.625746\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:49 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:53.119208\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 102 nodes, 214 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:53.576759\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.02s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:53 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:56.395448\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph projection completed: 102 nodes, 214 edges in 0.01s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mCogneeGraph\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:56.961329\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mVector collection retrieval completed: Retrieved distances from 6 collections in 0.02s\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\u001b[92m14:58:57 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\n",
- "\n",
- "\u001b[1m\n",
- "LiteLLM completion() model= gpt-4o-mini; provider = openai\u001b[0m\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Q: What are common risk factors for Type 2 Diabetes?\n",
- "A: ['Common risk factors for Type 2 Diabetes include:\\n1. High body mass index (BMI) \\n2. Physical inactivity \\n3. Poor diet (such as low adherence to a Mediterranean diet)\\n4. Smoking\\n5. Age (increased risk with older age)\\n6. Family history of diabetes\\n7. High blood pressure\\n8. High blood cholesterol\\n9. Waist circumference (indicating abdominal obesity)\\n10. Presence of chronic diseases (e.g., cardiovascular diseases, hypertension)']\n",
- "\n",
- "Q: What preventive measures reduce the risk of Hypertension?\n",
- "A: ['Preventive measures to reduce the risk of hypertension include:\\n1. **Moderate Coffee Consumption**: Studies suggest that moderate coffee intake may lower the risk of developing hypertension.\\n2. **Diet and Lifestyle**: Adopting a healthy diet, particularly one resembling the Mediterranean diet, and maintaining a healthy lifestyle plays a crucial role in cardiovascular health.\\n3. **Managing Genetics and Smoking Status**: Outcomes of coffee consumption on blood pressure may vary based on genetic factors, especially those related to caffeine metabolism, and smoking habits should be considered.']\n",
- "\n",
- "Q: What symptoms indicate possible Cardiovascular Disease?\n",
- "A: ['Symptoms indicating possible cardiovascular disease may include: \\n- Chest pain or discomfort (angina) \\n- Shortness of breath \\n- Fatigue or weakness \\n- Palpitations or irregular heartbeat \\n- Dizziness or fainting \\n- Swelling in the legs, ankles, or feet \\n- Pain or numbness in the arms or legs. \\nFurther evaluation by a healthcare provider is essential for accurate diagnosis and treatment.']\n",
- "\n",
- "Q: What diseases are associated with Obesity?\n",
- "A: ['Diseases associated with obesity include cardiovascular diseases, hypertension, and type 2 diabetes. Obesity is linked with an increased risk for these conditions, which can lead to complications in heart health and overall mortality.']\n",
- "\n"
- ]
- }
- ],
+ "metadata": {},
"source": [
"# Run without ontology\n",
"print(\"\\n--- Results WITHOUT ontology ---\\n\")\n",
@@ -1032,7 +183,9 @@
"answers_without = await query_pipeline(questions)\n",
"for q, a in zip(questions, answers_without):\n",
" print(f\"Q: {q}\\nA: {a}\\n\")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -1046,45 +199,8 @@
},
{
"cell_type": "code",
- "execution_count": 16,
"id": "36ee2a360f47a054",
- "metadata": {
- "ExecuteTime": {
- "end_time": "2025-04-09T15:25:33.512697Z",
- "start_time": "2025-04-09T15:25:33.471854Z"
- }
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n",
- "\u001b[2m2025-08-27T13:58:58.679995\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mGraph visualization saved as /Users/daulet/graph_visualization.html\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
- "\n",
- "\u001b[2m2025-08-27T13:58:58.682148\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mThe HTML file has been stored on your home directory! Navigate there with cd ~\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "'/Users/daulet/graph_visualization.html'"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "metadata": {},
"source": [
"import webbrowser\n",
"import os\n",
@@ -1094,7 +210,9 @@
"html_file = os.path.join(home_dir, \"graph_visualization.html\")\n",
"display(html_file)\n",
"webbrowser.open(f\"file://{html_file}\")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
},
{
"cell_type": "markdown",
@@ -1132,22 +250,8 @@
},
{
"cell_type": "code",
- "execution_count": null,
"id": "8d2a0fe555a7bc0f",
"metadata": {},
- "outputs": [
- {
- "ename": "",
- "evalue": "",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
- "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
- "\u001b[1;31mClick here for more info. \n",
- "\u001b[1;31mView Jupyter log for further details."
- ]
- }
- ],
"source": [
"# Only exit in interactive mode, not during GitHub Actions\n",
"import os\n",
@@ -1158,7 +262,17 @@
" os._exit(0)\n",
"else:\n",
" print(\"Skipping kernel exit - running in GitHub Actions\")"
- ]
+ ],
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "cell_type": "code",
+ "source": "",
+ "id": "adb6601890237b6a",
+ "outputs": [],
+ "execution_count": null
}
],
"metadata": {
diff --git a/pyproject.toml b/pyproject.toml
index 01681c2ae..c9c67a870 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[project]
name = "cognee"
-version = "0.3.4.dev4"
+version = "0.3.4"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },
@@ -64,14 +64,13 @@ dependencies = [
"pylance>=0.22.0,<1.0.0",
"kuzu (==0.11.0)",
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
-]
-
-[project.optional-dependencies]
-api = [
"uvicorn>=0.34.0,<1.0.0",
"gunicorn>=20.1.0,<24",
"websockets>=15.0.1,<16.0.0"
]
+
+[project.optional-dependencies]
+api=[]
distributed = [
"modal>=1.0.5,<2.0.0",
]
diff --git a/uv.lock b/uv.lock
index ddc19cab0..7a26ee96a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -852,7 +852,7 @@ wheels = [
[[package]]
name = "cognee"
-version = "0.3.4.dev4"
+version = "0.3.4"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },
@@ -864,6 +864,7 @@ dependencies = [
{ name = "fastapi" },
{ name = "fastapi-users", extra = ["sqlalchemy"] },
{ name = "filetype" },
+ { name = "gunicorn" },
{ name = "instructor" },
{ name = "jinja2" },
{ name = "kuzu" },
@@ -899,17 +900,14 @@ dependencies = [
{ name = "tiktoken" },
{ name = "typing-extensions" },
{ name = "unstructured", extra = ["pdf"] },
+ { name = "uvicorn" },
+ { name = "websockets" },
]
[package.optional-dependencies]
anthropic = [
{ name = "anthropic" },
]
-api = [
- { name = "gunicorn" },
- { name = "uvicorn" },
- { name = "websockets" },
-]
aws = [
{ name = "s3fs", extra = ["boto3"] },
]
@@ -1036,7 +1034,7 @@ requires-dist = [
{ name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
{ name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" },
{ name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" },
- { name = "gunicorn", marker = "extra == 'api'", specifier = ">=20.1.0,<24" },
+ { name = "gunicorn", specifier = ">=20.1.0,<24" },
{ name = "instructor", specifier = ">=1.9.1,<2.0.0" },
{ name = "jinja2", specifier = ">=3.1.3,<4" },
{ name = "kuzu", specifier = "==0.11.0" },
@@ -1105,8 +1103,8 @@ requires-dist = [
{ name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" },
{ name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
{ name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" },
- { name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
- { name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
+ { name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
+ { name = "websockets", specifier = ">=15.0.1,<16.0.0" },
]
provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
@@ -4706,7 +4704,7 @@ name = "nvidia-cudnn-cu12"
version = "9.10.2.21"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
+ { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@@ -4717,7 +4715,7 @@ name = "nvidia-cufft-cu12"
version = "11.3.3.83"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@@ -4744,9 +4742,9 @@ name = "nvidia-cusolver-cu12"
version = "11.7.3.90"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
- { name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten'" },
- { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+ { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+ { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@@ -4757,7 +4755,7 @@ name = "nvidia-cusparse-cu12"
version = "12.5.8.93"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
+ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@@ -8204,7 +8202,7 @@ name = "triton"
version = "3.4.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "setuptools", marker = "sys_platform != 'emscripten'" },
+ { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },