Merge branch 'dev' into feat/add-pdfproloader

Signed-off-by: EricXiao <taoiaox@gmail.com>
This commit is contained in:
EricXiao 2025-09-20 17:26:03 +08:00
commit d12ec0bc4f
56 changed files with 1303 additions and 1126 deletions

View file

@ -116,7 +116,15 @@ VECTOR_DB_PROVIDER="lancedb"
VECTOR_DB_URL=
VECTOR_DB_KEY=
################################################################################
# 🧩 Ontology resolver settings
################################################################################
# -- Ontology resolver params --------------------------------------
# ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and owl file to read ontology structures
# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.
################################################################################
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS

View file

@ -10,7 +10,7 @@ WORKFLOWS=(
"test_kuzu.yml"
"test_multimetric_qa_eval_run.yaml"
"test_graphrag_vs_rag_notebook.yml"
"test_gemini.yml"
"test_llms.yml"
"test_multimedia_example.yaml"
"test_deduplication.yml"
"test_eval_framework.yml"

View file

@ -1,29 +0,0 @@
name: test | gemini
on:
workflow_call:
jobs:
test-gemini:
name: Run Gemini Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run Gemini Simple Example
env:
LLM_PROVIDER: "gemini"
LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
LLM_MODEL: "gemini/gemini-1.5-flash"
EMBEDDING_PROVIDER: "gemini"
EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
EMBEDDING_MODEL: "gemini/text-embedding-004"
EMBEDDING_DIMENSIONS: "768"
EMBEDDING_MAX_TOKENS: "8076"
run: uv run python ./examples/python/simple_example.py

86
.github/workflows/test_llms.yml vendored Normal file
View file

@ -0,0 +1,86 @@
name: LLM Test Suites
permissions:
contents: read
on:
workflow_call:
env:
RUNTIME__LOG_LEVEL: ERROR
ENV: 'dev'
jobs:
test-gemini:
name: Run Gemini Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run Gemini Simple Example
env:
LLM_PROVIDER: "gemini"
LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
LLM_MODEL: "gemini/gemini-1.5-flash"
EMBEDDING_PROVIDER: "gemini"
EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
EMBEDDING_MODEL: "gemini/text-embedding-004"
EMBEDDING_DIMENSIONS: "768"
EMBEDDING_MAX_TOKENS: "8076"
run: uv run python ./examples/python/simple_example.py
test-fastembed:
name: Run Fastembed Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run Fastembed Simple Example
env:
LLM_PROVIDER: "openai"
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_PROVIDER: "fastembed"
EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
EMBEDDING_DIMENSIONS: "384"
EMBEDDING_MAX_TOKENS: "256"
run: uv run python ./examples/python/simple_example.py
test-openrouter:
name: Run OpenRouter Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run OpenRouter Simple Example
env:
LLM_PROVIDER: "custom"
LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
LLM_ENDPOINT: "https://openrouter.ai/api/v1"
EMBEDDING_PROVIDER: "openai"
EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: "openai/text-embedding-3-large"
EMBEDDING_DIMENSIONS: "3072"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py

View file

@ -1,30 +0,0 @@
name: test | openrouter
on:
workflow_call:
jobs:
test-openrouter:
name: Run OpenRouter Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Run OpenRouter Simple Example
env:
LLM_PROVIDER: "custom"
LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
LLM_ENDPOINT: "https://openrouter.ai/api/v1"
EMBEDDING_PROVIDER: "openai"
EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
EMBEDDING_MODEL: "openai/text-embedding-3-large"
EMBEDDING_DIMENSIONS: "3072"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py

View file

@ -115,16 +115,10 @@ jobs:
secrets: inherit
# Additional LLM tests
gemini-tests:
name: Gemini Tests
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/test_gemini.yml
secrets: inherit
openrouter-tests:
name: OpenRouter Tests
needs: [basic-tests, e2e-tests]
uses: ./.github/workflows/test_openrouter.yml
llm-tests:
name: LLM Test Suite
needs: [ basic-tests, e2e-tests ]
uses: ./.github/workflows/test_llms.yml
secrets: inherit
# Ollama tests moved to the end
@ -138,8 +132,7 @@ jobs:
different-operating-systems-tests,
vector-db-tests,
example-tests,
gemini-tests,
openrouter-tests,
llm-tests,
mcp-test,
relational-db-migration-tests,
docker-compose-test,
@ -161,8 +154,7 @@ jobs:
example-tests,
db-examples-tests,
mcp-test,
gemini-tests,
openrouter-tests,
llm-tests,
ollama-tests,
relational-db-migration-tests,
docker-compose-test,
@ -183,8 +175,7 @@ jobs:
"${{ needs.example-tests.result }}" == "success" &&
"${{ needs.db-examples-tests.result }}" == "success" &&
"${{ needs.relational-db-migration-tests.result }}" == "success" &&
"${{ needs.gemini-tests.result }}" == "success" &&
"${{ needs.openrouter-tests.result }}" == "success" &&
"${{ needs.llm-tests.result }}" == "success" &&
"${{ needs.docker-compose-test.result }}" == "success" &&
"${{ needs.docker-ci-test.result }}" == "success" &&
"${{ needs.ollama-tests.result }}" == "success" ]]; then

View file

@ -176,16 +176,6 @@ You can also cognify your files and query using cognee UI.
<img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
### Installation for UI
To use the cognee UI with full functionality, you need to install cognee with API dependencies:
```bash
pip install 'cognee[api]'
```
The UI requires backend server functionality (uvicorn and other API dependencies) which are not included in the default cognee installation to keep it lightweight.
### Running the UI
Try cognee UI by running ``` cognee-cli -ui ``` command on your terminal.

View file

@ -3,6 +3,7 @@ from pydantic import BaseModel
from typing import Union, Optional
from uuid import UUID
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.shared.logging_utils import get_logger
from cognee.shared.data_models import KnowledgeGraph
from cognee.infrastructure.llm import get_max_chunk_tokens
@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
from cognee.modules.pipelines import run_pipeline
from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.chunking.TextChunker import TextChunker
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import (
get_default_ontology_resolver,
get_ontology_resolver_from_env,
)
from cognee.modules.users.models import User
from cognee.tasks.documents import (
@ -39,7 +44,7 @@ async def cognify(
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
ontology_file_path: Optional[str] = None,
config: Config = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
run_in_background: bool = False,
@ -100,8 +105,6 @@ async def cognify(
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
Default limits: ~512-8192 tokens depending on models.
Smaller chunks = more granular but potentially fragmented knowledge.
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
Useful for specialized fields like medical or legal documents.
vector_db_config: Custom vector database configuration for embeddings storage.
graph_db_config: Custom graph database configuration for relationship storage.
run_in_background: If True, starts processing asynchronously and returns immediately.
@ -188,11 +191,28 @@ async def cognify(
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
"""
if config is None:
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
if temporal_cognify:
tasks = await get_temporal_tasks(user, chunker, chunk_size)
else:
tasks = await get_default_tasks(
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
user, graph_model, chunker, chunk_size, config, custom_prompt
)
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -216,9 +236,26 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
graph_model: BaseModel = KnowledgeGraph,
chunker=TextChunker,
chunk_size: int = None,
ontology_file_path: Optional[str] = None,
config: Config = None,
custom_prompt: Optional[str] = None,
) -> list[Task]:
if config is None:
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@ -230,7 +267,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
Task(
extract_graph_from_data,
graph_model=graph_model,
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
config=config,
custom_prompt=custom_prompt,
task_config={"batch_size": 10},
), # Generate knowledge graphs from the document chunks.

View file

@ -82,6 +82,9 @@ async def search(
Best for: General-purpose queries or when you're unsure which search type is best.
Returns: The results from the automatically selected search type.
**CHUNKS_LEXICAL**:
Token-based lexical chunk search (e.g., Jaccard). Best for: exact-term matching, stopword-aware lookups.
Returns: Ranked text chunks (optionally with scores).
Args:
query_text: Your question or search query in natural language.

View file

@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
system_root_directory: str = get_absolute_path(".cognee_system")
cache_root_directory: str = get_absolute_path(".cognee_cache")
monitoring_tool: object = Observer.LANGFUSE
monitoring_tool: object = Observer.NONE
@pydantic.model_validator(mode="after")
def validate_paths(self):
@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
# Require absolute paths for root directories
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
self.cache_root_directory = ensure_absolute_path(self.cache_root_directory)
# Set monitoring tool based on available keys
if self.langfuse_public_key and self.langfuse_secret_key:
self.monitoring_tool = Observer.LANGFUSE
return self
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")

View file

@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
from cognee.tasks.graph import extract_graph_from_data
from cognee.tasks.storage import add_data_points
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
async def get_default_tasks_by_indices(
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
graph_task = Task(
extract_graph_from_data,

View file

@ -6,6 +6,15 @@ from .create_dataset import create_dataset
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
"""
Create a new dataset and give all permissions on this dataset to the given user.
Args:
dataset_name: Name of the dataset.
user: The user object.
Returns:
Dataset: The new authorized dataset.
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -15,7 +15,7 @@ async def get_authorized_dataset(
Get a specific dataset with permissions for a user.
Args:
user_id (UUID): user id
user: User object
dataset_id (UUID): dataset id
permission_type (str): permission type(read, write, delete, share), default is read

View file

@ -11,6 +11,17 @@ from ..models import Dataset
async def get_authorized_dataset_by_name(
dataset_name: str, user: User, permission_type: str
) -> Optional[Dataset]:
"""
Get a specific dataset with the given name, with permissions for a given user.
Args:
dataset_name: Name of the dataset.
user: User object.
permission_type (str): permission type(read, write, delete, share), default is read
Returns:
Optional[Dataset]: dataset with permissions
"""
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)

View file

@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
generate_node_id,
generate_node_name,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.get_default_ontology_resolver import (
get_default_ontology_resolver,
get_ontology_resolver_from_env,
)
def _create_node_key(node_id: str, category: str) -> str:
@ -83,7 +89,7 @@ def _process_ontology_edges(
def _create_type_node(
node_type: str,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -141,7 +147,7 @@ def _create_entity_node(
node_name: str,
node_description: str,
type_node: EntityType,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -198,7 +204,7 @@ def _create_entity_node(
def _process_graph_nodes(
data_chunk: DocumentChunk,
graph: KnowledgeGraph,
ontology_resolver: OntologyResolver,
ontology_resolver: RDFLibOntologyResolver,
added_nodes_map: dict,
added_ontology_nodes_map: dict,
name_mapping: dict,
@ -277,7 +283,7 @@ def _process_graph_edges(
def expand_with_nodes_and_edges(
data_chunks: list[DocumentChunk],
chunk_graphs: list[KnowledgeGraph],
ontology_resolver: OntologyResolver = None,
ontology_resolver: BaseOntologyResolver = None,
existing_edges_map: Optional[dict[str, bool]] = None,
):
"""
@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
from the chunk content.
ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
types against an ontology. If None, a default OntologyResolver is created.
ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
types against an ontology. If None, a default RDFLibOntologyResolver is created.
Defaults to None.
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
existing_edges_map = {}
if ontology_resolver is None:
ontology_resolver = OntologyResolver()
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
else:
ontology_resolver = get_default_ontology_resolver()
added_nodes_map = {}
added_ontology_nodes_map = {}

View file

@ -23,8 +23,6 @@ async def retrieve_existing_edges(
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
data chunk. Each graph contains nodes (entities) and edges (relationships) that
were extracted from the chunk content.
graph_engine (GraphDBInterface): Interface to the graph database that will be queried
to check for existing edges. Must implement the has_edges() method.
Returns:
dict[str, bool]: A mapping of edge keys to boolean values indicating existence.

View file

@ -9,3 +9,17 @@ def get_observe():
from langfuse.decorators import observe
return observe
elif monitoring == Observer.NONE:
# Return a no-op decorator that handles keyword arguments
def no_op_decorator(*args, **kwargs):
if len(args) == 1 and callable(args[0]) and not kwargs:
# Direct decoration: @observe
return args[0]
else:
# Parameterized decoration: @observe(as_type="generation")
def decorator(func):
return func
return decorator
return no_op_decorator

View file

@ -4,6 +4,7 @@ from enum import Enum
class Observer(str, Enum):
"""Monitoring tools"""
NONE = "none"
LANGFUSE = "langfuse"
LLMLITE = "llmlite"
LANGSMITH = "langsmith"

View file

@ -0,0 +1,42 @@
from abc import ABC, abstractmethod
from typing import List, Tuple, Optional
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
class BaseOntologyResolver(ABC):
"""Abstract base class for ontology resolvers."""
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
"""Initialize the ontology resolver with a matching strategy.
Args:
matching_strategy: The strategy to use for entity matching.
Defaults to FuzzyMatchingStrategy if None.
"""
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
@abstractmethod
def build_lookup(self) -> None:
"""Build the lookup dictionary for ontology entities."""
pass
@abstractmethod
def refresh_lookup(self) -> None:
"""Refresh the lookup dictionary."""
pass
@abstractmethod
def find_closest_match(self, name: str, category: str) -> Optional[str]:
"""Find the closest match for a given name in the specified category."""
pass
@abstractmethod
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
"""Get a subgraph for the given node."""
pass

View file

@ -0,0 +1,41 @@
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
def get_default_ontology_resolver() -> BaseOntologyResolver:
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
def get_ontology_resolver_from_env(
ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
) -> BaseOntologyResolver:
"""
Create and return an ontology resolver instance based on environment parameters.
Currently, this function supports only the RDFLib-based ontology resolver
with a fuzzy matching strategy.
Args:
ontology_resolver (str): The ontology resolver type to use.
Supported value: "rdflib".
matching_strategy (str): The matching strategy to apply.
Supported value: "fuzzy".
ontology_file_path (str): Path to the ontology file required for the resolver.
Returns:
BaseOntologyResolver: An instance of the requested ontology resolver.
Raises:
EnvironmentError: If the provided resolver or strategy is unsupported,
or if required parameters are missing.
"""
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
return RDFLibOntologyResolver(
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
)
else:
raise EnvironmentError(
f"Unsupported ontology resolver: {ontology_resolver}. "
f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
)

View file

@ -0,0 +1,53 @@
import difflib
from abc import ABC, abstractmethod
from typing import List, Optional
class MatchingStrategy(ABC):
"""Abstract base class for ontology entity matching strategies."""
@abstractmethod
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the best match for a given name from a list of candidates.
Args:
name: The name to match
candidates: List of candidate names to match against
Returns:
The best matching candidate name, or None if no match found
"""
pass
class FuzzyMatchingStrategy(MatchingStrategy):
"""Fuzzy matching strategy using difflib for approximate string matching."""
def __init__(self, cutoff: float = 0.8):
"""Initialize fuzzy matching strategy.
Args:
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
"""
self.cutoff = cutoff
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
"""Find the closest fuzzy match for a given name.
Args:
name: The normalized name to match
candidates: List of normalized candidate names
Returns:
The best matching candidate name, or None if no match meets the cutoff
"""
if not candidates:
return None
# Check for exact match first
if name in candidates:
return name
# Find fuzzy match
best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
return best_match[0] if best_match else None

View file

@ -0,0 +1,20 @@
from typing import Any
class AttachedOntologyNode:
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
def __init__(self, uri: Any, category: str):
self.uri = uri
self.name = self._extract_name(uri)
self.category = category
@staticmethod
def _extract_name(uri: Any) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"

View file

@ -0,0 +1,24 @@
from typing import TypedDict, Optional
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.matching_strategies import MatchingStrategy
class OntologyConfig(TypedDict, total=False):
"""Configuration containing ontology resolver.
Attributes:
ontology_resolver: The ontology resolver instance to use
"""
ontology_resolver: Optional[BaseOntologyResolver]
class Config(TypedDict, total=False):
"""Top-level configuration dictionary.
Attributes:
ontology_config: Configuration containing ontology resolver
"""
ontology_config: Optional[OntologyConfig]

View file

@ -0,0 +1,45 @@
"""This module contains the configuration for ontology handling."""
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
class OntologyEnvConfig(BaseSettings):
"""
Represents the configuration for ontology handling, including parameters for
ontology file storage and resolution/matching strategies.
Public methods:
- to_dict
Instance variables:
- ontology_resolver
- ontology_matching
- ontology_file_path
- model_config
"""
ontology_resolver: str = "rdflib"
matching_strategy: str = "fuzzy"
ontology_file_path: str = ""
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
def to_dict(self) -> dict:
"""
Return the configuration as a dictionary.
"""
return {
"ontology_resolver": self.ontology_resolver,
"matching_strategy": self.matching_strategy,
"ontology_file_path": self.ontology_file_path,
}
@lru_cache
def get_ontology_env_config():
"""
Retrieve the ontology configuration. This function utilizes caching to return a
singleton instance of the OntologyConfig class for efficiency.
"""
return OntologyEnvConfig()

View file

@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
FindClosestMatchError,
GetSubgraphError,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
logger = get_logger("OntologyAdapter")
class AttachedOntologyNode:
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
class RDFLibOntologyResolver(BaseOntologyResolver):
"""RDFLib-based ontology resolver implementation.
def __init__(self, uri: URIRef, category: str):
self.uri = uri
self.name = self._extract_name(uri)
self.category = category
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
"""
@staticmethod
def _extract_name(uri: URIRef) -> str:
uri_str = str(uri)
if "#" in uri_str:
return uri_str.split("#")[-1]
return uri_str.rstrip("/").split("/")[-1]
def __repr__(self):
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
class OntologyResolver:
def __init__(self, ontology_file: Optional[str] = None):
def __init__(
self,
ontology_file: Optional[str] = None,
matching_strategy: Optional[MatchingStrategy] = None,
) -> None:
super().__init__(matching_strategy)
self.ontology_file = ontology_file
try:
if ontology_file and os.path.exists(ontology_file):
@ -60,7 +55,7 @@ class OntologyResolver:
name = uri_str.rstrip("/").split("/")[-1]
return name.lower().replace(" ", "_").strip()
def build_lookup(self):
def build_lookup(self) -> None:
try:
classes: Dict[str, URIRef] = {}
individuals: Dict[str, URIRef] = {}
@ -97,7 +92,7 @@ class OntologyResolver:
logger.error("Failed to build lookup dictionary: %s", str(e))
raise RuntimeError("Lookup build failed") from e
def refresh_lookup(self):
def refresh_lookup(self) -> None:
self.build_lookup()
logger.info("Ontology lookup refreshed.")
@ -105,13 +100,8 @@ class OntologyResolver:
try:
normalized_name = name.lower().replace(" ", "_").strip()
possible_matches = list(self.lookup.get(category, {}).keys())
if normalized_name in possible_matches:
return normalized_name
best_match = difflib.get_close_matches(
normalized_name, possible_matches, n=1, cutoff=0.8
)
return best_match[0] if best_match else None
return self.matching_strategy.find_match(normalized_name, possible_matches)
except Exception as e:
logger.error("Error in find_closest_match: %s", str(e))
raise FindClosestMatchError() from e
@ -125,7 +115,9 @@ class OntologyResolver:
def get_subgraph(
self, node_name: str, node_type: str = "individuals", directed: bool = True
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
) -> Tuple[
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
]:
nodes_set = set()
edges: List[Tuple[str, str, str]] = []
visited = set()

View file

@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
"""
Function handles creation and dataset authorization if dataset already exist for Cognee.
Verifies that provided user has necessary permission for provided Dataset.
If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
Args:
dataset_id: Id of the dataset.
dataset_name: Name of the dataset.
user: Cognee User request is being processed for, if None default user will be used.
Returns:
Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
"""
if not user:
user = await get_default_user()

View file

@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
datasets: Dataset names or Dataset UUID (in case Datasets already exist)
Returns:
Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
"""
# If no user is provided use default user
if user is None:

View file

@ -0,0 +1,56 @@
from cognee.modules.retrieval.lexical_retriever import LexicalRetriever
import re
from collections import Counter
from typing import Optional
class JaccardChunksRetriever(LexicalRetriever):
"""
Retriever that specializes LexicalRetriever to use Jaccard similarity.
"""
def __init__(self, top_k: int = 10, with_scores: bool = False,
stop_words: Optional[list[str]] = None, multiset_jaccard: bool = False):
"""
Parameters
----------
top_k : int
Number of top results to return.
with_scores : bool
If True, return (payload, score) pairs. Otherwise, only payloads.
stop_words : list[str], optional
List of tokens to filter out.
multiset_jaccard : bool
If True, use multiset Jaccard (frequency aware).
"""
self.stop_words = {t.lower() for t in stop_words} if stop_words else set()
self.multiset_jaccard = multiset_jaccard
super().__init__(
tokenizer=self._tokenizer,
scorer=self._scorer,
top_k=top_k,
with_scores=with_scores
)
def _tokenizer(self, text: str) -> list[str]:
"""
Tokenizer: lowercases, splits on word characters (w+), filters stopwords.
"""
tokens = re.findall(r"\w+", text.lower())
return [t for t in tokens if t not in self.stop_words]
def _scorer(self, query_tokens: list[str], chunk_tokens: list[str]) -> float:
"""
Jaccard similarity scorer.
- If multiset_jaccard=True, uses frequency-aware Jaccard.
- Otherwise, normal set Jaccard.
"""
if self.multiset_jaccard:
q_counts, c_counts = Counter(query_tokens), Counter(chunk_tokens)
numerator = sum(min(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
denominator = sum(max(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
return numerator / denominator if denominator else 0.0
else:
q_set, c_set = set(query_tokens), set(chunk_tokens)
if not q_set or not c_set:
return 0.0
return len(q_set & c_set) / len(q_set | c_set)

View file

@ -0,0 +1,117 @@
import asyncio
from typing import Any, Callable, Optional
from heapq import nlargest
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.retrieval.base_retriever import BaseRetriever
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
from cognee.shared.logging_utils import get_logger
logger = get_logger("LexicalRetriever")
class LexicalRetriever(BaseRetriever):
def __init__(self, tokenizer: Callable, scorer: Callable, top_k: int = 10, with_scores: bool = False):
if not callable(tokenizer) or not callable(scorer):
raise TypeError("tokenizer and scorer must be callables")
if not isinstance(top_k, int) or top_k <= 0:
raise ValueError("top_k must be a positive integer")
self.tokenizer = tokenizer
self.scorer = scorer
self.top_k = top_k
self.with_scores = bool(with_scores)
# Cache keyed by dataset context
self.chunks: dict[str, Any] = {} # {chunk_id: tokens}
self.payloads: dict[str, Any] = {} # {chunk_id: original_document}
self._initialized = False
self._init_lock = asyncio.Lock()
async def initialize(self):
"""Initialize retriever by reading all DocumentChunks from graph_engine."""
async with self._init_lock:
if self._initialized:
return
logger.info("Initializing LexicalRetriever by loading DocumentChunks from graph engine")
try:
graph_engine = await get_graph_engine()
nodes, _ = await graph_engine.get_filtered_graph_data([{"type": ["DocumentChunk"]}])
except Exception as e:
logger.error("Graph engine initialization failed")
raise NoDataError("Graph engine initialization failed") from e
chunk_count = 0
for node in nodes:
try:
chunk_id, document = node
except Exception:
logger.warning("Skipping node with unexpected shape: %r", node)
continue
if document.get("type") == "DocumentChunk" and document.get("text"):
try:
tokens = self.tokenizer(document["text"])
if not tokens:
continue
self.chunks[str(document.get("id",chunk_id))] = tokens
self.payloads[str(document.get("id",chunk_id))] = document
chunk_count += 1
except Exception as e:
logger.error("Tokenizer failed for chunk %s: %s", chunk_id, str(e))
if chunk_count == 0:
logger.error("Initialization completed but no valid chunks were loaded.")
raise NoDataError("No valid chunks loaded during initialization.")
self._initialized = True
logger.info("Initialized with %d document chunks", len(self.chunks))
async def get_context(self, query: str) -> Any:
"""Retrieves relevant chunks for the given query."""
if not self._initialized:
await self.initialize()
if not self.chunks:
logger.warning("No chunks available in retriever")
return []
try:
query_tokens = self.tokenizer(query)
except Exception as e:
logger.error("Failed to tokenize query: %s", str(e))
return []
if not query_tokens:
logger.warning("Query produced no tokens")
return []
results = []
for chunk_id, chunk_tokens in self.chunks.items():
try:
score = self.scorer(query_tokens, chunk_tokens)
if not isinstance(score, (int, float)):
logger.warning("Non-numeric score for chunk %s → treated as 0.0", chunk_id)
score = 0.0
except Exception as e:
logger.error("Scorer failed for chunk %s: %s", chunk_id, str(e))
score = 0.0
results.append((chunk_id, score))
top_results = nlargest(self.top_k, results, key=lambda x: x[1])
logger.info("Retrieved %d/%d chunks for query (len=%d)", len(top_results), len(results), len(query_tokens))
if self.with_scores:
return [(self.payloads[chunk_id], score) for chunk_id, score in top_results]
else:
return [self.payloads[chunk_id] for chunk_id, _ in top_results]
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
"""Returns context for the given query (retrieves if not provided)."""
if context is None:
context = await self.get_context(query)
return context

View file

@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
from cognee.modules.retrieval.graph_summary_completion_retriever import (
GraphSummaryCompletionRetriever,
)
@ -152,6 +153,10 @@ async def get_search_type_tools(
TemporalRetriever(top_k=top_k).get_completion,
TemporalRetriever(top_k=top_k).get_context,
],
SearchType.CHUNKS_LEXICAL: (lambda _r=JaccardChunksRetriever(top_k=top_k): [
_r.get_completion,
_r.get_context,
])(),
SearchType.CODING_RULES: [
CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
],

View file

@ -17,3 +17,4 @@ class SearchType(Enum):
FEEDBACK = "FEEDBACK"
TEMPORAL = "TEMPORAL"
CODING_RULES = "CODING_RULES"
CHUNKS_LEXICAL = "CHUNKS_LEXICAL"

View file

@ -9,6 +9,18 @@ from uuid import UUID
async def authorized_give_permission_on_datasets(
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
):
"""
Give permission to certain datasets to a user.
The request owner must have the necessary permission to share the datasets.
Args:
principal_id: Id of user to whom datasets are shared
dataset_ids: Ids of datasets to share
permission_name: Name of permission to give
owner_id: Id of the request owner
Returns:
None
"""
# If only a single dataset UUID is provided transform it to a list
if not isinstance(dataset_ids, list):
dataset_ids = [dataset_ids]

View file

@ -10,6 +10,17 @@ logger = get_logger()
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
"""
Check if a user has a specific permission on a dataset.
Args:
user: User whose permission is checked
permission_type: Type of permission to check
dataset_id: Id of the dataset
Returns:
None
"""
if user is None:
user = await get_default_user()

View file

@ -11,6 +11,16 @@ logger = get_logger()
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
"""
Return a list of datasets the user has permission for.
If the user is part of a tenant, return datasets his roles have permission for.
Args:
user
permission_type
Returns:
list[Dataset]: List of datasets user has permission for
"""
datasets = list()
# Get all datasets User has explicit access to
datasets.extend(await get_principal_datasets(user, permission_type))

View file

@ -8,6 +8,16 @@ from ...models import ACL, Permission
async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
"""
Return a list of documents ids for which the user has read permission.
If datasets are specified, return only documents from those datasets.
Args:
user_id: Id of the user
datasets: List of datasets
Returns:
list[str]: List of documents for which the user has read permission
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -6,6 +6,15 @@ from ...models.Principal import Principal
async def get_principal(principal_id: UUID):
"""
Return information about a user based on their id
Args:
principal_id: Id of the user
Returns:
principal: Information about the user (principal)
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -9,6 +9,17 @@ from ...models.ACL import ACL
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
"""
Return a list of datasets for which the user (principal) has a certain permission.
Args:
principal: Information about the user
permission_type: Type of permission
Returns:
list[Dataset]: List of datasets for which the user (principal)
has the permission (permission_type).
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -9,6 +9,16 @@ from ...models.Role import Role
async def get_role(tenant_id: UUID, role_name: str):
"""
Return the role with the name role_name of the given tenant.
Args:
tenant_id: Id of the given tenant
role_name: Name of the role
Returns
The role for the given tenant.
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
Return a list of datasets user has given permission for. If a list of datasets is provided,
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
Args:
user_id:
permission_type:
dataset_ids:
user_id: Id of the user.
permission_type: Type of the permission.
dataset_ids: Ids of the provided datasets
Returns:
list[Dataset]: List of datasets user has permission for

View file

@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
async def get_tenant(tenant_id: UUID):
"""
Return information about the tenant based on the given id.
Args:
tenant_id: Id of the given tenant
Returns
Information about the given tenant.
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
"""
Give the permission with given name to the role with the given id as a default permission.
Args:
role_id: Id of the role
permission_name: Name of the permission
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
"""
Give the permission with given name to the tenant with the given id as a default permission.
Args:
tenant_id: Id of the tenant
permission_name: Name of the permission
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
tenant = (

View file

@ -16,6 +16,15 @@ from cognee.modules.users.models import (
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
"""
Give the permission with given name to the user with the given id as a default permission.
Args:
user_id: Id of the tenant
permission_name: Name of the permission
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()

View file

@ -24,6 +24,16 @@ async def give_permission_on_dataset(
dataset_id: UUID,
permission_name: str,
):
"""
Give a specific permission on a dataset to a user.
Args:
principal: User who is being given the permission on the dataset
dataset_id: Id of the dataset
permission_name: Name of permission to give
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:

View file

@ -21,6 +21,17 @@ from cognee.modules.users.models import (
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
"""
Add a user with the given id to the role with the given id.
Args:
user_id: Id of the user.
role_id: Id of the role.
owner_id: Id of the request owner.
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()

View file

@ -16,6 +16,16 @@ async def create_role(
role_name: str,
owner_id: UUID,
):
"""
Create a new role with the given name, if the request owner with the given id
has the necessary permission.
Args:
role_name: Name of the new role.
owner_id: Id of the request owner.
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(owner_id)

View file

@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
"""
Add a user with the given id to the tenant with the given id.
This can only be successful if the request owner with the given id is the tenant owner.
Args:
user_id: Id of the user.
tenant_id: Id of the tenant.
owner_id: Id of the request owner.
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(user_id)

View file

@ -8,6 +8,16 @@ from cognee.modules.users.methods import get_user
async def create_tenant(tenant_name: str, user_id: UUID):
"""
Create a new tenant with the given name, for the user with the given id.
This user is the owner of the tenant.
Args:
tenant_name: Name of the new tenant.
user_id: Id of the user.
Returns:
None
"""
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:

View file

@ -3,8 +3,14 @@ from typing import Type, List, Optional
from pydantic import BaseModel
from cognee.infrastructure.databases.graph import get_graph_engine
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
from cognee.tasks.storage.add_data_points import add_data_points
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.get_default_ontology_resolver import (
get_default_ontology_resolver,
get_ontology_resolver_from_env,
)
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.modules.graph.utils import (
expand_with_nodes_and_edges,
@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
data_chunks: list[DocumentChunk],
chunk_graphs: list,
graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver,
ontology_resolver: BaseOntologyResolver,
) -> List[DocumentChunk]:
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
"""Integrate chunk graphs with ontology validation and store in databases.
This function processes document chunks and their associated knowledge graphs,
validates entities against an ontology resolver, and stores the integrated
data points and edges in the configured databases.
Args:
data_chunks: List of document chunks containing source data
chunk_graphs: List of knowledge graphs corresponding to each chunk
graph_model: Pydantic model class for graph data validation
ontology_resolver: Resolver for validating entities against ontology
Returns:
List of updated DocumentChunk objects with integrated data
Raises:
InvalidChunkGraphInputError: If input validation fails
InvalidGraphModelError: If graph model validation fails
InvalidOntologyAdapterError: If ontology resolver validation fails
"""
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
)
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
raise InvalidGraphModelError(graph_model)
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
raise InvalidOntologyAdapterError(
type(ontology_adapter).__name__ if ontology_adapter else "None"
type(ontology_resolver).__name__ if ontology_resolver else "None"
)
graph_engine = await get_graph_engine()
@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
)
graph_nodes, graph_edges = expand_with_nodes_and_edges(
data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
)
if len(graph_nodes) > 0:
@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
graph_model: Type[BaseModel],
ontology_adapter: OntologyResolver = None,
config: Config = None,
custom_prompt: Optional[str] = None,
) -> List[DocumentChunk]:
"""
@ -101,6 +126,24 @@ async def extract_graph_from_data(
if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
]
return await integrate_chunk_graphs(
data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
)
# Extract resolver from config if provided, otherwise get default
if config is None:
ontology_config = get_ontology_env_config()
if (
ontology_config.ontology_file_path
and ontology_config.ontology_resolver
and ontology_config.matching_strategy
):
config: Config = {
"ontology_config": {
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
}
}
else:
config: Config = {
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
}
ontology_resolver = config["ontology_config"]["ontology_resolver"]
return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)

View file

@ -3,7 +3,7 @@ from typing import List
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
from cognee.shared.data_models import KnowledgeGraph
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
extract_content_nodes_and_relationship_names,
@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
async def extract_graph_from_data(
data_chunks: List[DocumentChunk],
n_rounds: int = 2,
ontology_adapter: OntologyResolver = None,
ontology_adapter: BaseOntologyResolver = None,
) -> List[DocumentChunk]:
"""Extract and update graph data from document chunks in multiple steps."""
"""Extract and update graph data from document chunks using cascade extraction.
This function performs multi-step graph extraction from document chunks,
using cascade extraction techniques to build comprehensive knowledge graphs.
Args:
data_chunks: List of document chunks to process
n_rounds: Number of extraction rounds to perform (default: 2)
ontology_adapter: Resolver for validating entities against ontology
Returns:
List of updated DocumentChunk objects with extracted graph data
"""
chunk_nodes = await asyncio.gather(
*[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
)
@ -44,5 +56,5 @@ async def extract_graph_from_data(
data_chunks=data_chunks,
chunk_graphs=chunk_graphs,
graph_model=KnowledgeGraph,
ontology_adapter=ontology_adapter or OntologyResolver(),
ontology_adapter=ontology_adapter,
)

View file

@ -1,12 +1,14 @@
import pytest
from rdflib import Graph, Namespace, RDF, OWL, RDFS
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.models import AttachedOntologyNode
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
def test_ontology_adapter_initialization_success():
"""Test successful initialization of OntologyAdapter."""
"""Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver."""
adapter = OntologyResolver()
adapter = get_default_ontology_resolver()
adapter.build_lookup()
assert isinstance(adapter.lookup, dict)
@ -14,7 +16,7 @@ def test_ontology_adapter_initialization_success():
def test_ontology_adapter_initialization_file_not_found():
"""Test OntologyAdapter initialization with nonexistent file."""
adapter = OntologyResolver(ontology_file="nonexistent.owl")
adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
assert adapter.graph is None
@ -27,7 +29,7 @@ def test_build_lookup():
g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -50,7 +52,7 @@ def test_find_closest_match_exact():
g.add((ns.Car, RDF.type, OWL.Class))
g.add((ns.Audi, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -71,7 +73,7 @@ def test_find_closest_match_fuzzy():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -92,7 +94,7 @@ def test_find_closest_match_no_match():
g.add((ns.Audi, RDF.type, ns.Car))
g.add((ns.BMW, RDF.type, ns.Car))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -102,10 +104,10 @@ def test_find_closest_match_no_match():
def test_get_subgraph_no_match_rdflib():
"""Test get_subgraph returns empty results for a non-existent node."""
"""Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver."""
g = Graph()
resolver = OntologyResolver()
resolver = get_default_ontology_resolver()
resolver.graph = g
resolver.build_lookup()
@ -138,7 +140,7 @@ def test_get_subgraph_success_rdflib():
g.add((ns.VW, owns, ns.Audi))
g.add((ns.VW, owns, ns.Porsche))
resolver = OntologyResolver()
resolver = RDFLibOntologyResolver()
resolver.graph = g
resolver.build_lookup()
@ -160,10 +162,10 @@ def test_get_subgraph_success_rdflib():
def test_refresh_lookup_rdflib():
"""Test that refresh_lookup rebuilds the lookup dict into a new object."""
"""Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver."""
g = Graph()
resolver = OntologyResolver()
resolver = get_default_ontology_resolver()
resolver.graph = g
resolver.build_lookup()
@ -172,3 +174,318 @@ def test_refresh_lookup_rdflib():
resolver.refresh_lookup()
assert resolver.lookup is not original_lookup
def test_fuzzy_matching_strategy_exact_match():
"""Test FuzzyMatchingStrategy finds exact matches."""
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
strategy = FuzzyMatchingStrategy()
candidates = ["audi", "bmw", "mercedes"]
result = strategy.find_match("audi", candidates)
assert result == "audi"
def test_fuzzy_matching_strategy_fuzzy_match():
"""Test FuzzyMatchingStrategy finds fuzzy matches."""
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
strategy = FuzzyMatchingStrategy(cutoff=0.6)
candidates = ["audi", "bmw", "mercedes"]
result = strategy.find_match("audii", candidates)
assert result == "audi"
def test_fuzzy_matching_strategy_no_match():
"""Test FuzzyMatchingStrategy returns None when no match meets cutoff."""
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
strategy = FuzzyMatchingStrategy(cutoff=0.9)
candidates = ["audi", "bmw", "mercedes"]
result = strategy.find_match("completely_different", candidates)
assert result is None
def test_fuzzy_matching_strategy_empty_candidates():
"""Test FuzzyMatchingStrategy handles empty candidates list."""
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
strategy = FuzzyMatchingStrategy()
result = strategy.find_match("audi", [])
assert result is None
def test_base_ontology_resolver_initialization():
"""Test BaseOntologyResolver initialization with default matching strategy."""
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
class TestOntologyResolver(BaseOntologyResolver):
def build_lookup(self):
pass
def refresh_lookup(self):
pass
def find_closest_match(self, name, category):
return None
def get_subgraph(self, node_name, node_type="individuals", directed=True):
return [], [], None
resolver = TestOntologyResolver()
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
def test_base_ontology_resolver_custom_matching_strategy():
"""Test BaseOntologyResolver initialization with custom matching strategy."""
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
from cognee.modules.ontology.matching_strategies import MatchingStrategy
class CustomMatchingStrategy(MatchingStrategy):
def find_match(self, name, candidates):
return "custom_match"
class TestOntologyResolver(BaseOntologyResolver):
def build_lookup(self):
pass
def refresh_lookup(self):
pass
def find_closest_match(self, name, category):
return None
def get_subgraph(self, node_name, node_type="individuals", directed=True):
return [], [], None
custom_strategy = CustomMatchingStrategy()
resolver = TestOntologyResolver(matching_strategy=custom_strategy)
assert resolver.matching_strategy == custom_strategy
def test_ontology_config_structure():
"""Test TypedDict structure for ontology configuration."""
from cognee.modules.ontology.ontology_config import Config
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
matching_strategy = FuzzyMatchingStrategy()
resolver = RDFLibOntologyResolver(matching_strategy=matching_strategy)
config: Config = {"ontology_config": {"ontology_resolver": resolver}}
assert config["ontology_config"]["ontology_resolver"] == resolver
def test_get_ontology_resolver_default():
"""Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = get_default_ontology_resolver()
assert isinstance(resolver, RDFLibOntologyResolver)
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
def test_get_default_ontology_resolver():
"""Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = get_default_ontology_resolver()
assert isinstance(resolver, RDFLibOntologyResolver)
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
def test_rdflib_ontology_resolver_uses_matching_strategy():
"""Test that RDFLibOntologyResolver uses the provided matching strategy."""
from cognee.modules.ontology.matching_strategies import MatchingStrategy
class TestMatchingStrategy(MatchingStrategy):
def find_match(self, name, candidates):
return "test_match" if candidates else None
ns = Namespace("http://example.org/test#")
g = Graph()
g.add((ns.Car, RDF.type, OWL.Class))
g.add((ns.Audi, RDF.type, ns.Car))
resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy())
resolver.graph = g
resolver.build_lookup()
result = resolver.find_closest_match("Audi", "individuals")
assert result == "test_match"
def test_rdflib_ontology_resolver_default_matching_strategy():
"""Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default."""
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = RDFLibOntologyResolver()
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
def test_get_ontology_resolver_from_env_success():
"""Test get_ontology_resolver_from_env returns correct resolver with valid parameters."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = get_ontology_resolver_from_env(
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
)
assert isinstance(resolver, RDFLibOntologyResolver)
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
assert resolver.ontology_file == "/test/path.owl"
def test_get_ontology_resolver_from_env_unsupported_resolver():
"""Test get_ontology_resolver_from_env raises EnvironmentError for unsupported resolver."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="unsupported",
matching_strategy="fuzzy",
ontology_file_path="/test/path.owl",
)
assert "Unsupported ontology resolver: unsupported" in str(exc_info.value)
assert "Supported resolvers are: RdfLib with FuzzyMatchingStrategy" in str(exc_info.value)
def test_get_ontology_resolver_from_env_unsupported_strategy():
"""Test get_ontology_resolver_from_env raises EnvironmentError for unsupported strategy."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="rdflib",
matching_strategy="unsupported",
ontology_file_path="/test/path.owl",
)
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
def test_get_ontology_resolver_from_env_empty_file_path():
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty file path."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=""
)
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
def test_get_ontology_resolver_from_env_none_file_path():
"""Test get_ontology_resolver_from_env raises EnvironmentError for None file path."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=None
)
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
def test_get_ontology_resolver_from_env_empty_resolver():
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty resolver."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
)
assert "Unsupported ontology resolver:" in str(exc_info.value)
def test_get_ontology_resolver_from_env_empty_strategy():
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty strategy."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env(
ontology_resolver="rdflib", matching_strategy="", ontology_file_path="/test/path.owl"
)
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
def test_get_ontology_resolver_from_env_default_parameters():
"""Test get_ontology_resolver_from_env with default empty parameters raises EnvironmentError."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError) as exc_info:
get_ontology_resolver_from_env()
assert "Unsupported ontology resolver:" in str(exc_info.value)
def test_get_ontology_resolver_from_env_case_sensitivity():
"""Test get_ontology_resolver_from_env is case sensitive."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
with pytest.raises(EnvironmentError):
get_ontology_resolver_from_env(
ontology_resolver="RDFLIB",
matching_strategy="fuzzy",
ontology_file_path="/test/path.owl",
)
with pytest.raises(EnvironmentError):
get_ontology_resolver_from_env(
ontology_resolver="RdfLib",
matching_strategy="fuzzy",
ontology_file_path="/test/path.owl",
)
def test_get_ontology_resolver_from_env_with_actual_file():
"""Test get_ontology_resolver_from_env works with actual file path."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
resolver = get_ontology_resolver_from_env(
ontology_resolver="rdflib",
matching_strategy="fuzzy",
ontology_file_path="/path/to/ontology.owl",
)
assert isinstance(resolver, RDFLibOntologyResolver)
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
assert resolver.ontology_file == "/path/to/ontology.owl"
def test_get_ontology_resolver_from_env_resolver_functionality():
"""Test that resolver created from env function works correctly."""
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
resolver = get_ontology_resolver_from_env(
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
)
resolver.build_lookup()
assert isinstance(resolver.lookup, dict)
result = resolver.find_closest_match("test", "individuals")
assert result is None # Should return None for non-existent entity
nodes, relationships, start_node = resolver.get_subgraph("test", "individuals")
assert nodes == []
assert relationships == []
assert start_node is None

View file

@ -5,6 +5,8 @@ import cognee
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.shared.logging_utils import setup_logging
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
text_1 = """
1. Audi
@ -60,7 +62,14 @@ async def main():
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
)
await cognee.cognify(ontology_file_path=ontology_path)
# Create full config structure manually
config: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
await cognee.cognify(config=config)
print("Knowledge with ontology created.")
# Step 4: Query insights

View file

@ -5,6 +5,8 @@ import os
import textwrap
from cognee.api.v1.search import SearchType
from cognee.api.v1.visualize.visualize import visualize_graph
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
from cognee.modules.ontology.ontology_config import Config
async def run_pipeline(ontology_path=None):
@ -17,7 +19,13 @@ async def run_pipeline(ontology_path=None):
await cognee.add(scientific_papers_dir)
pipeline_run = await cognee.cognify(ontology_file_path=ontology_path)
config: Config = {
"ontology_config": {
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
}
}
pipeline_run = await cognee.cognify(config=config)
return pipeline_run

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
[project]
name = "cognee"
version = "0.3.4.dev4"
version = "0.3.4"
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
authors = [
{ name = "Vasilije Markovic" },
@ -64,14 +64,13 @@ dependencies = [
"pylance>=0.22.0,<1.0.0",
"kuzu (==0.11.0)",
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
]
[project.optional-dependencies]
api = [
"uvicorn>=0.34.0,<1.0.0",
"gunicorn>=20.1.0,<24",
"websockets>=15.0.1,<16.0.0"
]
[project.optional-dependencies]
api=[]
distributed = [
"modal>=1.0.5,<2.0.0",
]

30
uv.lock generated
View file

@ -852,7 +852,7 @@ wheels = [
[[package]]
name = "cognee"
version = "0.3.4.dev4"
version = "0.3.4"
source = { editable = "." }
dependencies = [
{ name = "aiofiles" },
@ -864,6 +864,7 @@ dependencies = [
{ name = "fastapi" },
{ name = "fastapi-users", extra = ["sqlalchemy"] },
{ name = "filetype" },
{ name = "gunicorn" },
{ name = "instructor" },
{ name = "jinja2" },
{ name = "kuzu" },
@ -899,17 +900,14 @@ dependencies = [
{ name = "tiktoken" },
{ name = "typing-extensions" },
{ name = "unstructured", extra = ["pdf"] },
{ name = "uvicorn" },
{ name = "websockets" },
]
[package.optional-dependencies]
anthropic = [
{ name = "anthropic" },
]
api = [
{ name = "gunicorn" },
{ name = "uvicorn" },
{ name = "websockets" },
]
aws = [
{ name = "s3fs", extra = ["boto3"] },
]
@ -1036,7 +1034,7 @@ requires-dist = [
{ name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
{ name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" },
{ name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" },
{ name = "gunicorn", marker = "extra == 'api'", specifier = ">=20.1.0,<24" },
{ name = "gunicorn", specifier = ">=20.1.0,<24" },
{ name = "instructor", specifier = ">=1.9.1,<2.0.0" },
{ name = "jinja2", specifier = ">=3.1.3,<4" },
{ name = "kuzu", specifier = "==0.11.0" },
@ -1105,8 +1103,8 @@ requires-dist = [
{ name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" },
{ name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
{ name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" },
{ name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
{ name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
{ name = "websockets", specifier = ">=15.0.1,<16.0.0" },
]
provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
@ -4706,7 +4704,7 @@ name = "nvidia-cudnn-cu12"
version = "9.10.2.21"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
@ -4717,7 +4715,7 @@ name = "nvidia-cufft-cu12"
version = "11.3.3.83"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
@ -4744,9 +4742,9 @@ name = "nvidia-cusolver-cu12"
version = "11.7.3.90"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
{ name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
@ -4757,7 +4755,7 @@ name = "nvidia-cusparse-cu12"
version = "12.5.8.93"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
@ -8204,7 +8202,7 @@ name = "triton"
version = "3.4.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools", marker = "sys_platform != 'emscripten'" },
{ name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
]
wheels = [
{ url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },