Merge branch 'dev' into feat/add-pdfproloader
Signed-off-by: EricXiao <taoiaox@gmail.com>
This commit is contained in:
commit
d12ec0bc4f
56 changed files with 1303 additions and 1126 deletions
|
|
@ -116,7 +116,15 @@ VECTOR_DB_PROVIDER="lancedb"
|
|||
VECTOR_DB_URL=
|
||||
VECTOR_DB_KEY=
|
||||
|
||||
################################################################################
|
||||
# 🧩 Ontology resolver settings
|
||||
################################################################################
|
||||
|
||||
# -- Ontology resolver params --------------------------------------
|
||||
# ONTOLOGY_RESOLVER=rdflib # Default: uses rdflib and owl file to read ontology structures
|
||||
# MATCHING_STRATEGY=fuzzy # Default: uses fuzzy matching with 80% similarity threshold
|
||||
# ONTOLOGY_FILE_PATH=YOUR_FULL_FULE_PATH # Default: empty
|
||||
# To add ontology resolvers, either set them as it is set in ontology_example or add full_path and settings as envs.
|
||||
|
||||
################################################################################
|
||||
# 🔄 MIGRATION (RELATIONAL → GRAPH) SETTINGS
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ WORKFLOWS=(
|
|||
"test_kuzu.yml"
|
||||
"test_multimetric_qa_eval_run.yaml"
|
||||
"test_graphrag_vs_rag_notebook.yml"
|
||||
"test_gemini.yml"
|
||||
"test_llms.yml"
|
||||
"test_multimedia_example.yaml"
|
||||
"test_deduplication.yml"
|
||||
"test_eval_framework.yml"
|
||||
|
|
|
|||
29
.github/workflows/test_gemini.yml
vendored
29
.github/workflows/test_gemini.yml
vendored
|
|
@ -1,29 +0,0 @@
|
|||
name: test | gemini
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
test-gemini:
|
||||
name: Run Gemini Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Gemini Simple Example
|
||||
env:
|
||||
LLM_PROVIDER: "gemini"
|
||||
LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
LLM_MODEL: "gemini/gemini-1.5-flash"
|
||||
EMBEDDING_PROVIDER: "gemini"
|
||||
EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
EMBEDDING_MODEL: "gemini/text-embedding-004"
|
||||
EMBEDDING_DIMENSIONS: "768"
|
||||
EMBEDDING_MAX_TOKENS: "8076"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
86
.github/workflows/test_llms.yml
vendored
Normal file
86
.github/workflows/test_llms.yml
vendored
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
name: LLM Test Suites
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
env:
|
||||
RUNTIME__LOG_LEVEL: ERROR
|
||||
ENV: 'dev'
|
||||
|
||||
jobs:
|
||||
test-gemini:
|
||||
name: Run Gemini Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Gemini Simple Example
|
||||
env:
|
||||
LLM_PROVIDER: "gemini"
|
||||
LLM_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
LLM_MODEL: "gemini/gemini-1.5-flash"
|
||||
EMBEDDING_PROVIDER: "gemini"
|
||||
EMBEDDING_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||
EMBEDDING_MODEL: "gemini/text-embedding-004"
|
||||
EMBEDDING_DIMENSIONS: "768"
|
||||
EMBEDDING_MAX_TOKENS: "8076"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
|
||||
test-fastembed:
|
||||
name: Run Fastembed Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run Fastembed Simple Example
|
||||
env:
|
||||
LLM_PROVIDER: "openai"
|
||||
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||
EMBEDDING_PROVIDER: "fastembed"
|
||||
EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
|
||||
EMBEDDING_DIMENSIONS: "384"
|
||||
EMBEDDING_MAX_TOKENS: "256"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
|
||||
test-openrouter:
|
||||
name: Run OpenRouter Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run OpenRouter Simple Example
|
||||
env:
|
||||
LLM_PROVIDER: "custom"
|
||||
LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
|
||||
LLM_ENDPOINT: "https://openrouter.ai/api/v1"
|
||||
EMBEDDING_PROVIDER: "openai"
|
||||
EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EMBEDDING_MODEL: "openai/text-embedding-3-large"
|
||||
EMBEDDING_DIMENSIONS: "3072"
|
||||
EMBEDDING_MAX_TOKENS: "8191"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
30
.github/workflows/test_openrouter.yml
vendored
30
.github/workflows/test_openrouter.yml
vendored
|
|
@ -1,30 +0,0 @@
|
|||
name: test | openrouter
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
test-openrouter:
|
||||
name: Run OpenRouter Test
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Check out repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Cognee Setup
|
||||
uses: ./.github/actions/cognee_setup
|
||||
with:
|
||||
python-version: '3.11.x'
|
||||
|
||||
- name: Run OpenRouter Simple Example
|
||||
env:
|
||||
LLM_PROVIDER: "custom"
|
||||
LLM_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
|
||||
LLM_MODEL: "openrouter/x-ai/grok-code-fast-1"
|
||||
LLM_ENDPOINT: "https://openrouter.ai/api/v1"
|
||||
EMBEDDING_PROVIDER: "openai"
|
||||
EMBEDDING_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
EMBEDDING_MODEL: "openai/text-embedding-3-large"
|
||||
EMBEDDING_DIMENSIONS: "3072"
|
||||
EMBEDDING_MAX_TOKENS: "8191"
|
||||
run: uv run python ./examples/python/simple_example.py
|
||||
23
.github/workflows/test_suites.yml
vendored
23
.github/workflows/test_suites.yml
vendored
|
|
@ -115,16 +115,10 @@ jobs:
|
|||
secrets: inherit
|
||||
|
||||
# Additional LLM tests
|
||||
gemini-tests:
|
||||
name: Gemini Tests
|
||||
needs: [basic-tests, e2e-tests]
|
||||
uses: ./.github/workflows/test_gemini.yml
|
||||
secrets: inherit
|
||||
|
||||
openrouter-tests:
|
||||
name: OpenRouter Tests
|
||||
needs: [basic-tests, e2e-tests]
|
||||
uses: ./.github/workflows/test_openrouter.yml
|
||||
llm-tests:
|
||||
name: LLM Test Suite
|
||||
needs: [ basic-tests, e2e-tests ]
|
||||
uses: ./.github/workflows/test_llms.yml
|
||||
secrets: inherit
|
||||
|
||||
# Ollama tests moved to the end
|
||||
|
|
@ -138,8 +132,7 @@ jobs:
|
|||
different-operating-systems-tests,
|
||||
vector-db-tests,
|
||||
example-tests,
|
||||
gemini-tests,
|
||||
openrouter-tests,
|
||||
llm-tests,
|
||||
mcp-test,
|
||||
relational-db-migration-tests,
|
||||
docker-compose-test,
|
||||
|
|
@ -161,8 +154,7 @@ jobs:
|
|||
example-tests,
|
||||
db-examples-tests,
|
||||
mcp-test,
|
||||
gemini-tests,
|
||||
openrouter-tests,
|
||||
llm-tests,
|
||||
ollama-tests,
|
||||
relational-db-migration-tests,
|
||||
docker-compose-test,
|
||||
|
|
@ -183,8 +175,7 @@ jobs:
|
|||
"${{ needs.example-tests.result }}" == "success" &&
|
||||
"${{ needs.db-examples-tests.result }}" == "success" &&
|
||||
"${{ needs.relational-db-migration-tests.result }}" == "success" &&
|
||||
"${{ needs.gemini-tests.result }}" == "success" &&
|
||||
"${{ needs.openrouter-tests.result }}" == "success" &&
|
||||
"${{ needs.llm-tests.result }}" == "success" &&
|
||||
"${{ needs.docker-compose-test.result }}" == "success" &&
|
||||
"${{ needs.docker-ci-test.result }}" == "success" &&
|
||||
"${{ needs.ollama-tests.result }}" == "success" ]]; then
|
||||
|
|
|
|||
10
README.md
10
README.md
|
|
@ -176,16 +176,6 @@ You can also cognify your files and query using cognee UI.
|
|||
|
||||
<img src="assets/cognee-new-ui.webp" width="100%" alt="Cognee UI 2"></a>
|
||||
|
||||
### Installation for UI
|
||||
|
||||
To use the cognee UI with full functionality, you need to install cognee with API dependencies:
|
||||
|
||||
```bash
|
||||
pip install 'cognee[api]'
|
||||
```
|
||||
|
||||
The UI requires backend server functionality (uvicorn and other API dependencies) which are not included in the default cognee installation to keep it lightweight.
|
||||
|
||||
### Running the UI
|
||||
|
||||
Try cognee UI by running ``` cognee-cli -ui ``` command on your terminal.
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|||
from typing import Union, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.infrastructure.llm import get_max_chunk_tokens
|
||||
|
|
@ -10,7 +11,11 @@ from cognee.infrastructure.llm import get_max_chunk_tokens
|
|||
from cognee.modules.pipelines import run_pipeline
|
||||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.chunking.TextChunker import TextChunker
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.ontology_config import Config
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import (
|
||||
get_default_ontology_resolver,
|
||||
get_ontology_resolver_from_env,
|
||||
)
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.tasks.documents import (
|
||||
|
|
@ -39,7 +44,7 @@ async def cognify(
|
|||
graph_model: BaseModel = KnowledgeGraph,
|
||||
chunker=TextChunker,
|
||||
chunk_size: int = None,
|
||||
ontology_file_path: Optional[str] = None,
|
||||
config: Config = None,
|
||||
vector_db_config: dict = None,
|
||||
graph_db_config: dict = None,
|
||||
run_in_background: bool = False,
|
||||
|
|
@ -100,8 +105,6 @@ async def cognify(
|
|||
Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
|
||||
Default limits: ~512-8192 tokens depending on models.
|
||||
Smaller chunks = more granular but potentially fragmented knowledge.
|
||||
ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
|
||||
Useful for specialized fields like medical or legal documents.
|
||||
vector_db_config: Custom vector database configuration for embeddings storage.
|
||||
graph_db_config: Custom graph database configuration for relationship storage.
|
||||
run_in_background: If True, starts processing asynchronously and returns immediately.
|
||||
|
|
@ -188,11 +191,28 @@ async def cognify(
|
|||
- LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
|
||||
- LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
|
||||
"""
|
||||
if config is None:
|
||||
ontology_config = get_ontology_env_config()
|
||||
if (
|
||||
ontology_config.ontology_file_path
|
||||
and ontology_config.ontology_resolver
|
||||
and ontology_config.matching_strategy
|
||||
):
|
||||
config: Config = {
|
||||
"ontology_config": {
|
||||
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
|
||||
}
|
||||
}
|
||||
else:
|
||||
config: Config = {
|
||||
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
||||
}
|
||||
|
||||
if temporal_cognify:
|
||||
tasks = await get_temporal_tasks(user, chunker, chunk_size)
|
||||
else:
|
||||
tasks = await get_default_tasks(
|
||||
user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
|
||||
user, graph_model, chunker, chunk_size, config, custom_prompt
|
||||
)
|
||||
|
||||
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
|
||||
|
|
@ -216,9 +236,26 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
graph_model: BaseModel = KnowledgeGraph,
|
||||
chunker=TextChunker,
|
||||
chunk_size: int = None,
|
||||
ontology_file_path: Optional[str] = None,
|
||||
config: Config = None,
|
||||
custom_prompt: Optional[str] = None,
|
||||
) -> list[Task]:
|
||||
if config is None:
|
||||
ontology_config = get_ontology_env_config()
|
||||
if (
|
||||
ontology_config.ontology_file_path
|
||||
and ontology_config.ontology_resolver
|
||||
and ontology_config.matching_strategy
|
||||
):
|
||||
config: Config = {
|
||||
"ontology_config": {
|
||||
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
|
||||
}
|
||||
}
|
||||
else:
|
||||
config: Config = {
|
||||
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
||||
}
|
||||
|
||||
default_tasks = [
|
||||
Task(classify_documents),
|
||||
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||
|
|
@ -230,7 +267,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
|||
Task(
|
||||
extract_graph_from_data,
|
||||
graph_model=graph_model,
|
||||
ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
|
||||
config=config,
|
||||
custom_prompt=custom_prompt,
|
||||
task_config={"batch_size": 10},
|
||||
), # Generate knowledge graphs from the document chunks.
|
||||
|
|
|
|||
|
|
@ -82,6 +82,9 @@ async def search(
|
|||
Best for: General-purpose queries or when you're unsure which search type is best.
|
||||
Returns: The results from the automatically selected search type.
|
||||
|
||||
**CHUNKS_LEXICAL**:
|
||||
Token-based lexical chunk search (e.g., Jaccard). Best for: exact-term matching, stopword-aware lookups.
|
||||
Returns: Ranked text chunks (optionally with scores).
|
||||
|
||||
Args:
|
||||
query_text: Your question or search query in natural language.
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
|
|||
data_root_directory: str = get_absolute_path(".data_storage")
|
||||
system_root_directory: str = get_absolute_path(".cognee_system")
|
||||
cache_root_directory: str = get_absolute_path(".cognee_cache")
|
||||
monitoring_tool: object = Observer.LANGFUSE
|
||||
monitoring_tool: object = Observer.NONE
|
||||
|
||||
@pydantic.model_validator(mode="after")
|
||||
def validate_paths(self):
|
||||
|
|
@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
|
|||
# Require absolute paths for root directories
|
||||
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
|
||||
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
|
||||
self.cache_root_directory = ensure_absolute_path(self.cache_root_directory)
|
||||
# Set monitoring tool based on available keys
|
||||
if self.langfuse_public_key and self.langfuse_secret_key:
|
||||
self.monitoring_tool = Observer.LANGFUSE
|
||||
|
||||
return self
|
||||
|
||||
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
|||
from cognee.tasks.graph import extract_graph_from_data
|
||||
from cognee.tasks.storage import add_data_points
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
|
||||
|
||||
async def get_default_tasks_by_indices(
|
||||
|
|
@ -33,7 +33,7 @@ async def get_no_summary_tasks(
|
|||
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
||||
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
||||
|
||||
ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
|
||||
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
||||
|
||||
graph_task = Task(
|
||||
extract_graph_from_data,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,15 @@ from .create_dataset import create_dataset
|
|||
|
||||
|
||||
async def create_authorized_dataset(dataset_name: str, user: User) -> Dataset:
|
||||
"""
|
||||
Create a new dataset and give all permissions on this dataset to the given user.
|
||||
Args:
|
||||
dataset_name: Name of the dataset.
|
||||
user: The user object.
|
||||
|
||||
Returns:
|
||||
Dataset: The new authorized dataset.
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ async def get_authorized_dataset(
|
|||
Get a specific dataset with permissions for a user.
|
||||
|
||||
Args:
|
||||
user_id (UUID): user id
|
||||
user: User object
|
||||
dataset_id (UUID): dataset id
|
||||
permission_type (str): permission type(read, write, delete, share), default is read
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,17 @@ from ..models import Dataset
|
|||
async def get_authorized_dataset_by_name(
|
||||
dataset_name: str, user: User, permission_type: str
|
||||
) -> Optional[Dataset]:
|
||||
"""
|
||||
Get a specific dataset with the given name, with permissions for a given user.
|
||||
|
||||
Args:
|
||||
dataset_name: Name of the dataset.
|
||||
user: User object.
|
||||
permission_type (str): permission type(read, write, delete, share), default is read
|
||||
|
||||
Returns:
|
||||
Optional[Dataset]: dataset with permissions
|
||||
"""
|
||||
authorized_datasets = await get_authorized_existing_datasets([], permission_type, user)
|
||||
|
||||
return next((dataset for dataset in authorized_datasets if dataset.name == dataset_name), None)
|
||||
|
|
|
|||
|
|
@ -7,8 +7,14 @@ from cognee.modules.engine.utils import (
|
|||
generate_node_id,
|
||||
generate_node_name,
|
||||
)
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import (
|
||||
get_default_ontology_resolver,
|
||||
get_ontology_resolver_from_env,
|
||||
)
|
||||
|
||||
|
||||
def _create_node_key(node_id: str, category: str) -> str:
|
||||
|
|
@ -83,7 +89,7 @@ def _process_ontology_edges(
|
|||
|
||||
def _create_type_node(
|
||||
node_type: str,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -141,7 +147,7 @@ def _create_entity_node(
|
|||
node_name: str,
|
||||
node_description: str,
|
||||
type_node: EntityType,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -198,7 +204,7 @@ def _create_entity_node(
|
|||
def _process_graph_nodes(
|
||||
data_chunk: DocumentChunk,
|
||||
graph: KnowledgeGraph,
|
||||
ontology_resolver: OntologyResolver,
|
||||
ontology_resolver: RDFLibOntologyResolver,
|
||||
added_nodes_map: dict,
|
||||
added_ontology_nodes_map: dict,
|
||||
name_mapping: dict,
|
||||
|
|
@ -277,7 +283,7 @@ def _process_graph_edges(
|
|||
def expand_with_nodes_and_edges(
|
||||
data_chunks: list[DocumentChunk],
|
||||
chunk_graphs: list[KnowledgeGraph],
|
||||
ontology_resolver: OntologyResolver = None,
|
||||
ontology_resolver: BaseOntologyResolver = None,
|
||||
existing_edges_map: Optional[dict[str, bool]] = None,
|
||||
):
|
||||
"""
|
||||
|
|
@ -296,8 +302,8 @@ def expand_with_nodes_and_edges(
|
|||
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
||||
data chunk. Each graph contains nodes (entities) and edges (relationships) extracted
|
||||
from the chunk content.
|
||||
ontology_resolver (OntologyResolver, optional): Resolver for validating entities and
|
||||
types against an ontology. If None, a default OntologyResolver is created.
|
||||
ontology_resolver (BaseOntologyResolver, optional): Resolver for validating entities and
|
||||
types against an ontology. If None, a default RDFLibOntologyResolver is created.
|
||||
Defaults to None.
|
||||
existing_edges_map (dict[str, bool], optional): Mapping of existing edge keys to prevent
|
||||
duplicate edge creation. Keys are formatted as "{source_id}_{target_id}_{relation}".
|
||||
|
|
@ -320,7 +326,15 @@ def expand_with_nodes_and_edges(
|
|||
existing_edges_map = {}
|
||||
|
||||
if ontology_resolver is None:
|
||||
ontology_resolver = OntologyResolver()
|
||||
ontology_config = get_ontology_env_config()
|
||||
if (
|
||||
ontology_config.ontology_file_path
|
||||
and ontology_config.ontology_resolver
|
||||
and ontology_config.matching_strategy
|
||||
):
|
||||
ontology_resolver = get_ontology_resolver_from_env(**ontology_config.to_dict())
|
||||
else:
|
||||
ontology_resolver = get_default_ontology_resolver()
|
||||
|
||||
added_nodes_map = {}
|
||||
added_ontology_nodes_map = {}
|
||||
|
|
|
|||
|
|
@ -23,8 +23,6 @@ async def retrieve_existing_edges(
|
|||
chunk_graphs (list[KnowledgeGraph]): List of knowledge graphs corresponding to each
|
||||
data chunk. Each graph contains nodes (entities) and edges (relationships) that
|
||||
were extracted from the chunk content.
|
||||
graph_engine (GraphDBInterface): Interface to the graph database that will be queried
|
||||
to check for existing edges. Must implement the has_edges() method.
|
||||
|
||||
Returns:
|
||||
dict[str, bool]: A mapping of edge keys to boolean values indicating existence.
|
||||
|
|
|
|||
|
|
@ -9,3 +9,17 @@ def get_observe():
|
|||
from langfuse.decorators import observe
|
||||
|
||||
return observe
|
||||
elif monitoring == Observer.NONE:
|
||||
# Return a no-op decorator that handles keyword arguments
|
||||
def no_op_decorator(*args, **kwargs):
|
||||
if len(args) == 1 and callable(args[0]) and not kwargs:
|
||||
# Direct decoration: @observe
|
||||
return args[0]
|
||||
else:
|
||||
# Parameterized decoration: @observe(as_type="generation")
|
||||
def decorator(func):
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
return no_op_decorator
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from enum import Enum
|
|||
class Observer(str, Enum):
|
||||
"""Monitoring tools"""
|
||||
|
||||
NONE = "none"
|
||||
LANGFUSE = "langfuse"
|
||||
LLMLITE = "llmlite"
|
||||
LANGSMITH = "langsmith"
|
||||
|
|
|
|||
42
cognee/modules/ontology/base_ontology_resolver.py
Normal file
42
cognee/modules/ontology/base_ontology_resolver.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||
|
||||
|
||||
class BaseOntologyResolver(ABC):
|
||||
"""Abstract base class for ontology resolvers."""
|
||||
|
||||
def __init__(self, matching_strategy: Optional[MatchingStrategy] = None):
|
||||
"""Initialize the ontology resolver with a matching strategy.
|
||||
|
||||
Args:
|
||||
matching_strategy: The strategy to use for entity matching.
|
||||
Defaults to FuzzyMatchingStrategy if None.
|
||||
"""
|
||||
self.matching_strategy = matching_strategy or FuzzyMatchingStrategy()
|
||||
|
||||
@abstractmethod
|
||||
def build_lookup(self) -> None:
|
||||
"""Build the lookup dictionary for ontology entities."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def refresh_lookup(self) -> None:
|
||||
"""Refresh the lookup dictionary."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_closest_match(self, name: str, category: str) -> Optional[str]:
|
||||
"""Find the closest match for a given name in the specified category."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_subgraph(
|
||||
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
||||
) -> Tuple[
|
||||
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
||||
]:
|
||||
"""Get a subgraph for the given node."""
|
||||
pass
|
||||
41
cognee/modules/ontology/get_default_ontology_resolver.py
Normal file
41
cognee/modules/ontology/get_default_ontology_resolver.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
|
||||
def get_default_ontology_resolver() -> BaseOntologyResolver:
|
||||
return RDFLibOntologyResolver(ontology_file=None, matching_strategy=FuzzyMatchingStrategy())
|
||||
|
||||
|
||||
def get_ontology_resolver_from_env(
|
||||
ontology_resolver: str = "", matching_strategy: str = "", ontology_file_path: str = ""
|
||||
) -> BaseOntologyResolver:
|
||||
"""
|
||||
Create and return an ontology resolver instance based on environment parameters.
|
||||
|
||||
Currently, this function supports only the RDFLib-based ontology resolver
|
||||
with a fuzzy matching strategy.
|
||||
|
||||
Args:
|
||||
ontology_resolver (str): The ontology resolver type to use.
|
||||
Supported value: "rdflib".
|
||||
matching_strategy (str): The matching strategy to apply.
|
||||
Supported value: "fuzzy".
|
||||
ontology_file_path (str): Path to the ontology file required for the resolver.
|
||||
|
||||
Returns:
|
||||
BaseOntologyResolver: An instance of the requested ontology resolver.
|
||||
|
||||
Raises:
|
||||
EnvironmentError: If the provided resolver or strategy is unsupported,
|
||||
or if required parameters are missing.
|
||||
"""
|
||||
if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
|
||||
return RDFLibOntologyResolver(
|
||||
matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
|
||||
)
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
f"Unsupported ontology resolver: {ontology_resolver}. "
|
||||
f"Supported resolvers are: RdfLib with FuzzyMatchingStrategy."
|
||||
)
|
||||
53
cognee/modules/ontology/matching_strategies.py
Normal file
53
cognee/modules/ontology/matching_strategies.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import difflib
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class MatchingStrategy(ABC):
|
||||
"""Abstract base class for ontology entity matching strategies."""
|
||||
|
||||
@abstractmethod
|
||||
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||
"""Find the best match for a given name from a list of candidates.
|
||||
|
||||
Args:
|
||||
name: The name to match
|
||||
candidates: List of candidate names to match against
|
||||
|
||||
Returns:
|
||||
The best matching candidate name, or None if no match found
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class FuzzyMatchingStrategy(MatchingStrategy):
|
||||
"""Fuzzy matching strategy using difflib for approximate string matching."""
|
||||
|
||||
def __init__(self, cutoff: float = 0.8):
|
||||
"""Initialize fuzzy matching strategy.
|
||||
|
||||
Args:
|
||||
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
|
||||
"""
|
||||
self.cutoff = cutoff
|
||||
|
||||
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
||||
"""Find the closest fuzzy match for a given name.
|
||||
|
||||
Args:
|
||||
name: The normalized name to match
|
||||
candidates: List of normalized candidate names
|
||||
|
||||
Returns:
|
||||
The best matching candidate name, or None if no match meets the cutoff
|
||||
"""
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# Check for exact match first
|
||||
if name in candidates:
|
||||
return name
|
||||
|
||||
# Find fuzzy match
|
||||
best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
|
||||
return best_match[0] if best_match else None
|
||||
20
cognee/modules/ontology/models.py
Normal file
20
cognee/modules/ontology/models.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from typing import Any
|
||||
|
||||
|
||||
class AttachedOntologyNode:
|
||||
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
||||
|
||||
def __init__(self, uri: Any, category: str):
|
||||
self.uri = uri
|
||||
self.name = self._extract_name(uri)
|
||||
self.category = category
|
||||
|
||||
@staticmethod
|
||||
def _extract_name(uri: Any) -> str:
|
||||
uri_str = str(uri)
|
||||
if "#" in uri_str:
|
||||
return uri_str.split("#")[-1]
|
||||
return uri_str.rstrip("/").split("/")[-1]
|
||||
|
||||
def __repr__(self):
|
||||
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
||||
24
cognee/modules/ontology/ontology_config.py
Normal file
24
cognee/modules/ontology/ontology_config.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
from typing import TypedDict, Optional
|
||||
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
||||
|
||||
|
||||
class OntologyConfig(TypedDict, total=False):
|
||||
"""Configuration containing ontology resolver.
|
||||
|
||||
Attributes:
|
||||
ontology_resolver: The ontology resolver instance to use
|
||||
"""
|
||||
|
||||
ontology_resolver: Optional[BaseOntologyResolver]
|
||||
|
||||
|
||||
class Config(TypedDict, total=False):
|
||||
"""Top-level configuration dictionary.
|
||||
|
||||
Attributes:
|
||||
ontology_config: Configuration containing ontology resolver
|
||||
"""
|
||||
|
||||
ontology_config: Optional[OntologyConfig]
|
||||
45
cognee/modules/ontology/ontology_env_config.py
Normal file
45
cognee/modules/ontology/ontology_env_config.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"""This module contains the configuration for ontology handling."""
|
||||
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class OntologyEnvConfig(BaseSettings):
|
||||
"""
|
||||
Represents the configuration for ontology handling, including parameters for
|
||||
ontology file storage and resolution/matching strategies.
|
||||
|
||||
Public methods:
|
||||
- to_dict
|
||||
|
||||
Instance variables:
|
||||
- ontology_resolver
|
||||
- ontology_matching
|
||||
- ontology_file_path
|
||||
- model_config
|
||||
"""
|
||||
|
||||
ontology_resolver: str = "rdflib"
|
||||
matching_strategy: str = "fuzzy"
|
||||
ontology_file_path: str = ""
|
||||
|
||||
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""
|
||||
Return the configuration as a dictionary.
|
||||
"""
|
||||
return {
|
||||
"ontology_resolver": self.ontology_resolver,
|
||||
"matching_strategy": self.matching_strategy,
|
||||
"ontology_file_path": self.ontology_file_path,
|
||||
}
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_ontology_env_config():
|
||||
"""
|
||||
Retrieve the ontology configuration. This function utilizes caching to return a
|
||||
singleton instance of the OntologyConfig class for efficiency.
|
||||
"""
|
||||
return OntologyEnvConfig()
|
||||
|
|
@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
|
|||
FindClosestMatchError,
|
||||
GetSubgraphError,
|
||||
)
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
||||
|
||||
logger = get_logger("OntologyAdapter")
|
||||
|
||||
|
||||
class AttachedOntologyNode:
|
||||
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
||||
class RDFLibOntologyResolver(BaseOntologyResolver):
|
||||
"""RDFLib-based ontology resolver implementation.
|
||||
|
||||
def __init__(self, uri: URIRef, category: str):
|
||||
self.uri = uri
|
||||
self.name = self._extract_name(uri)
|
||||
self.category = category
|
||||
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
|
||||
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _extract_name(uri: URIRef) -> str:
|
||||
uri_str = str(uri)
|
||||
if "#" in uri_str:
|
||||
return uri_str.split("#")[-1]
|
||||
return uri_str.rstrip("/").split("/")[-1]
|
||||
|
||||
def __repr__(self):
|
||||
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
||||
|
||||
|
||||
class OntologyResolver:
|
||||
def __init__(self, ontology_file: Optional[str] = None):
|
||||
def __init__(
|
||||
self,
|
||||
ontology_file: Optional[str] = None,
|
||||
matching_strategy: Optional[MatchingStrategy] = None,
|
||||
) -> None:
|
||||
super().__init__(matching_strategy)
|
||||
self.ontology_file = ontology_file
|
||||
try:
|
||||
if ontology_file and os.path.exists(ontology_file):
|
||||
|
|
@ -60,7 +55,7 @@ class OntologyResolver:
|
|||
name = uri_str.rstrip("/").split("/")[-1]
|
||||
return name.lower().replace(" ", "_").strip()
|
||||
|
||||
def build_lookup(self):
|
||||
def build_lookup(self) -> None:
|
||||
try:
|
||||
classes: Dict[str, URIRef] = {}
|
||||
individuals: Dict[str, URIRef] = {}
|
||||
|
|
@ -97,7 +92,7 @@ class OntologyResolver:
|
|||
logger.error("Failed to build lookup dictionary: %s", str(e))
|
||||
raise RuntimeError("Lookup build failed") from e
|
||||
|
||||
def refresh_lookup(self):
|
||||
def refresh_lookup(self) -> None:
|
||||
self.build_lookup()
|
||||
logger.info("Ontology lookup refreshed.")
|
||||
|
||||
|
|
@ -105,13 +100,8 @@ class OntologyResolver:
|
|||
try:
|
||||
normalized_name = name.lower().replace(" ", "_").strip()
|
||||
possible_matches = list(self.lookup.get(category, {}).keys())
|
||||
if normalized_name in possible_matches:
|
||||
return normalized_name
|
||||
|
||||
best_match = difflib.get_close_matches(
|
||||
normalized_name, possible_matches, n=1, cutoff=0.8
|
||||
)
|
||||
return best_match[0] if best_match else None
|
||||
return self.matching_strategy.find_match(normalized_name, possible_matches)
|
||||
except Exception as e:
|
||||
logger.error("Error in find_closest_match: %s", str(e))
|
||||
raise FindClosestMatchError() from e
|
||||
|
|
@ -125,7 +115,9 @@ class OntologyResolver:
|
|||
|
||||
def get_subgraph(
|
||||
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
||||
) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
|
||||
) -> Tuple[
|
||||
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
||||
]:
|
||||
nodes_set = set()
|
||||
edges: List[Tuple[str, str, str]] = []
|
||||
visited = set()
|
||||
|
|
@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
|
|||
|
||||
|
||||
async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
|
||||
"""
|
||||
Function handles creation and dataset authorization if dataset already exist for Cognee.
|
||||
Verifies that provided user has necessary permission for provided Dataset.
|
||||
If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
|
||||
|
||||
Args:
|
||||
dataset_id: Id of the dataset.
|
||||
dataset_name: Name of the dataset.
|
||||
user: Cognee User request is being processed for, if None default user will be used.
|
||||
|
||||
Returns:
|
||||
Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
|
||||
"""
|
||||
if not user:
|
||||
user = await get_default_user()
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
|
|||
datasets: Dataset names or Dataset UUID (in case Datasets already exist)
|
||||
|
||||
Returns:
|
||||
|
||||
Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
|
||||
"""
|
||||
# If no user is provided use default user
|
||||
if user is None:
|
||||
|
|
|
|||
56
cognee/modules/retrieval/jaccard_retrival.py
Normal file
56
cognee/modules/retrieval/jaccard_retrival.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
from cognee.modules.retrieval.lexical_retriever import LexicalRetriever
|
||||
import re
|
||||
from collections import Counter
|
||||
from typing import Optional
|
||||
class JaccardChunksRetriever(LexicalRetriever):
|
||||
"""
|
||||
Retriever that specializes LexicalRetriever to use Jaccard similarity.
|
||||
"""
|
||||
|
||||
def __init__(self, top_k: int = 10, with_scores: bool = False,
|
||||
stop_words: Optional[list[str]] = None, multiset_jaccard: bool = False):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
top_k : int
|
||||
Number of top results to return.
|
||||
with_scores : bool
|
||||
If True, return (payload, score) pairs. Otherwise, only payloads.
|
||||
stop_words : list[str], optional
|
||||
List of tokens to filter out.
|
||||
multiset_jaccard : bool
|
||||
If True, use multiset Jaccard (frequency aware).
|
||||
"""
|
||||
self.stop_words = {t.lower() for t in stop_words} if stop_words else set()
|
||||
self.multiset_jaccard = multiset_jaccard
|
||||
|
||||
super().__init__(
|
||||
tokenizer=self._tokenizer,
|
||||
scorer=self._scorer,
|
||||
top_k=top_k,
|
||||
with_scores=with_scores
|
||||
)
|
||||
|
||||
def _tokenizer(self, text: str) -> list[str]:
|
||||
"""
|
||||
Tokenizer: lowercases, splits on word characters (w+), filters stopwords.
|
||||
"""
|
||||
tokens = re.findall(r"\w+", text.lower())
|
||||
return [t for t in tokens if t not in self.stop_words]
|
||||
|
||||
def _scorer(self, query_tokens: list[str], chunk_tokens: list[str]) -> float:
|
||||
"""
|
||||
Jaccard similarity scorer.
|
||||
- If multiset_jaccard=True, uses frequency-aware Jaccard.
|
||||
- Otherwise, normal set Jaccard.
|
||||
"""
|
||||
if self.multiset_jaccard:
|
||||
q_counts, c_counts = Counter(query_tokens), Counter(chunk_tokens)
|
||||
numerator = sum(min(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
|
||||
denominator = sum(max(q_counts[t], c_counts[t]) for t in set(q_counts) | set(c_counts))
|
||||
return numerator / denominator if denominator else 0.0
|
||||
else:
|
||||
q_set, c_set = set(query_tokens), set(chunk_tokens)
|
||||
if not q_set or not c_set:
|
||||
return 0.0
|
||||
return len(q_set & c_set) / len(q_set | c_set)
|
||||
117
cognee/modules/retrieval/lexical_retriever.py
Normal file
117
cognee/modules/retrieval/lexical_retriever.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import asyncio
|
||||
from typing import Any, Callable, Optional
|
||||
from heapq import nlargest
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.retrieval.base_retriever import BaseRetriever
|
||||
from cognee.modules.retrieval.exceptions.exceptions import NoDataError
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
|
||||
|
||||
logger = get_logger("LexicalRetriever")
|
||||
|
||||
|
||||
class LexicalRetriever(BaseRetriever):
|
||||
|
||||
def __init__(self, tokenizer: Callable, scorer: Callable, top_k: int = 10, with_scores: bool = False):
|
||||
if not callable(tokenizer) or not callable(scorer):
|
||||
raise TypeError("tokenizer and scorer must be callables")
|
||||
if not isinstance(top_k, int) or top_k <= 0:
|
||||
raise ValueError("top_k must be a positive integer")
|
||||
|
||||
self.tokenizer = tokenizer
|
||||
self.scorer = scorer
|
||||
self.top_k = top_k
|
||||
self.with_scores = bool(with_scores)
|
||||
|
||||
# Cache keyed by dataset context
|
||||
self.chunks: dict[str, Any] = {} # {chunk_id: tokens}
|
||||
self.payloads: dict[str, Any] = {} # {chunk_id: original_document}
|
||||
self._initialized = False
|
||||
self._init_lock = asyncio.Lock()
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize retriever by reading all DocumentChunks from graph_engine."""
|
||||
async with self._init_lock:
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
logger.info("Initializing LexicalRetriever by loading DocumentChunks from graph engine")
|
||||
|
||||
try:
|
||||
graph_engine = await get_graph_engine()
|
||||
nodes, _ = await graph_engine.get_filtered_graph_data([{"type": ["DocumentChunk"]}])
|
||||
except Exception as e:
|
||||
logger.error("Graph engine initialization failed")
|
||||
raise NoDataError("Graph engine initialization failed") from e
|
||||
|
||||
chunk_count = 0
|
||||
for node in nodes:
|
||||
try:
|
||||
chunk_id, document = node
|
||||
except Exception:
|
||||
logger.warning("Skipping node with unexpected shape: %r", node)
|
||||
continue
|
||||
|
||||
if document.get("type") == "DocumentChunk" and document.get("text"):
|
||||
try:
|
||||
tokens = self.tokenizer(document["text"])
|
||||
if not tokens:
|
||||
continue
|
||||
self.chunks[str(document.get("id",chunk_id))] = tokens
|
||||
self.payloads[str(document.get("id",chunk_id))] = document
|
||||
chunk_count += 1
|
||||
except Exception as e:
|
||||
logger.error("Tokenizer failed for chunk %s: %s", chunk_id, str(e))
|
||||
|
||||
if chunk_count == 0:
|
||||
logger.error("Initialization completed but no valid chunks were loaded.")
|
||||
raise NoDataError("No valid chunks loaded during initialization.")
|
||||
|
||||
self._initialized = True
|
||||
logger.info("Initialized with %d document chunks", len(self.chunks))
|
||||
|
||||
async def get_context(self, query: str) -> Any:
|
||||
"""Retrieves relevant chunks for the given query."""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
if not self.chunks:
|
||||
logger.warning("No chunks available in retriever")
|
||||
return []
|
||||
|
||||
try:
|
||||
query_tokens = self.tokenizer(query)
|
||||
except Exception as e:
|
||||
logger.error("Failed to tokenize query: %s", str(e))
|
||||
return []
|
||||
|
||||
if not query_tokens:
|
||||
logger.warning("Query produced no tokens")
|
||||
return []
|
||||
|
||||
results = []
|
||||
for chunk_id, chunk_tokens in self.chunks.items():
|
||||
try:
|
||||
score = self.scorer(query_tokens, chunk_tokens)
|
||||
if not isinstance(score, (int, float)):
|
||||
logger.warning("Non-numeric score for chunk %s → treated as 0.0", chunk_id)
|
||||
score = 0.0
|
||||
except Exception as e:
|
||||
logger.error("Scorer failed for chunk %s: %s", chunk_id, str(e))
|
||||
score = 0.0
|
||||
results.append((chunk_id, score))
|
||||
|
||||
top_results = nlargest(self.top_k, results, key=lambda x: x[1])
|
||||
logger.info("Retrieved %d/%d chunks for query (len=%d)", len(top_results), len(results), len(query_tokens))
|
||||
|
||||
if self.with_scores:
|
||||
return [(self.payloads[chunk_id], score) for chunk_id, score in top_results]
|
||||
else:
|
||||
return [self.payloads[chunk_id] for chunk_id, _ in top_results]
|
||||
|
||||
async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
|
||||
"""Returns context for the given query (retrieves if not provided)."""
|
||||
if context is None:
|
||||
context = await self.get_context(query)
|
||||
return context
|
||||
|
|
@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
|
|||
from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
|
||||
from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
|
||||
from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
|
||||
from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
|
||||
from cognee.modules.retrieval.graph_summary_completion_retriever import (
|
||||
GraphSummaryCompletionRetriever,
|
||||
)
|
||||
|
|
@ -152,6 +153,10 @@ async def get_search_type_tools(
|
|||
TemporalRetriever(top_k=top_k).get_completion,
|
||||
TemporalRetriever(top_k=top_k).get_context,
|
||||
],
|
||||
SearchType.CHUNKS_LEXICAL: (lambda _r=JaccardChunksRetriever(top_k=top_k): [
|
||||
_r.get_completion,
|
||||
_r.get_context,
|
||||
])(),
|
||||
SearchType.CODING_RULES: [
|
||||
CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
|
||||
],
|
||||
|
|
|
|||
|
|
@ -17,3 +17,4 @@ class SearchType(Enum):
|
|||
FEEDBACK = "FEEDBACK"
|
||||
TEMPORAL = "TEMPORAL"
|
||||
CODING_RULES = "CODING_RULES"
|
||||
CHUNKS_LEXICAL = "CHUNKS_LEXICAL"
|
||||
|
|
|
|||
|
|
@ -9,6 +9,18 @@ from uuid import UUID
|
|||
async def authorized_give_permission_on_datasets(
|
||||
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
|
||||
):
|
||||
"""
|
||||
Give permission to certain datasets to a user.
|
||||
The request owner must have the necessary permission to share the datasets.
|
||||
Args:
|
||||
principal_id: Id of user to whom datasets are shared
|
||||
dataset_ids: Ids of datasets to share
|
||||
permission_name: Name of permission to give
|
||||
owner_id: Id of the request owner
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# If only a single dataset UUID is provided transform it to a list
|
||||
if not isinstance(dataset_ids, list):
|
||||
dataset_ids = [dataset_ids]
|
||||
|
|
|
|||
|
|
@ -10,6 +10,17 @@ logger = get_logger()
|
|||
|
||||
|
||||
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
|
||||
"""
|
||||
Check if a user has a specific permission on a dataset.
|
||||
Args:
|
||||
user: User whose permission is checked
|
||||
permission_type: Type of permission to check
|
||||
dataset_id: Id of the dataset
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
if user is None:
|
||||
user = await get_default_user()
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,16 @@ logger = get_logger()
|
|||
|
||||
|
||||
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
|
||||
"""
|
||||
Return a list of datasets the user has permission for.
|
||||
If the user is part of a tenant, return datasets his roles have permission for.
|
||||
Args:
|
||||
user
|
||||
permission_type
|
||||
|
||||
Returns:
|
||||
list[Dataset]: List of datasets user has permission for
|
||||
"""
|
||||
datasets = list()
|
||||
# Get all datasets User has explicit access to
|
||||
datasets.extend(await get_principal_datasets(user, permission_type))
|
||||
|
|
|
|||
|
|
@ -8,6 +8,16 @@ from ...models import ACL, Permission
|
|||
|
||||
|
||||
async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
|
||||
"""
|
||||
Return a list of documents ids for which the user has read permission.
|
||||
If datasets are specified, return only documents from those datasets.
|
||||
Args:
|
||||
user_id: Id of the user
|
||||
datasets: List of datasets
|
||||
|
||||
Returns:
|
||||
list[str]: List of documents for which the user has read permission
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,15 @@ from ...models.Principal import Principal
|
|||
|
||||
|
||||
async def get_principal(principal_id: UUID):
|
||||
"""
|
||||
Return information about a user based on their id
|
||||
Args:
|
||||
principal_id: Id of the user
|
||||
|
||||
Returns:
|
||||
principal: Information about the user (principal)
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,17 @@ from ...models.ACL import ACL
|
|||
|
||||
|
||||
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
|
||||
"""
|
||||
Return a list of datasets for which the user (principal) has a certain permission.
|
||||
Args:
|
||||
principal: Information about the user
|
||||
permission_type: Type of permission
|
||||
|
||||
Returns:
|
||||
list[Dataset]: List of datasets for which the user (principal)
|
||||
has the permission (permission_type).
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,16 @@ from ...models.Role import Role
|
|||
|
||||
|
||||
async def get_role(tenant_id: UUID, role_name: str):
|
||||
"""
|
||||
Return the role with the name role_name of the given tenant.
|
||||
Args:
|
||||
tenant_id: Id of the given tenant
|
||||
role_name: Name of the role
|
||||
|
||||
Returns
|
||||
The role for the given tenant.
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
|
|||
Return a list of datasets user has given permission for. If a list of datasets is provided,
|
||||
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
|
||||
Args:
|
||||
user_id:
|
||||
permission_type:
|
||||
dataset_ids:
|
||||
user_id: Id of the user.
|
||||
permission_type: Type of the permission.
|
||||
dataset_ids: Ids of the provided datasets
|
||||
|
||||
Returns:
|
||||
list[Dataset]: List of datasets user has permission for
|
||||
|
|
|
|||
|
|
@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
|
|||
|
||||
|
||||
async def get_tenant(tenant_id: UUID):
|
||||
"""
|
||||
Return information about the tenant based on the given id.
|
||||
Args:
|
||||
tenant_id: Id of the given tenant
|
||||
|
||||
Returns
|
||||
Information about the given tenant.
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|||
|
||||
|
||||
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
|
||||
"""
|
||||
Give the permission with given name to the role with the given id as a default permission.
|
||||
Args:
|
||||
role_id: Id of the role
|
||||
permission_name: Name of the permission
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|||
|
||||
|
||||
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
|
||||
"""
|
||||
Give the permission with given name to the tenant with the given id as a default permission.
|
||||
Args:
|
||||
tenant_id: Id of the tenant
|
||||
permission_name: Name of the permission
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
tenant = (
|
||||
|
|
|
|||
|
|
@ -16,6 +16,15 @@ from cognee.modules.users.models import (
|
|||
|
||||
|
||||
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
|
||||
"""
|
||||
Give the permission with given name to the user with the given id as a default permission.
|
||||
Args:
|
||||
user_id: Id of the tenant
|
||||
permission_name: Name of the permission
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
||||
|
|
|
|||
|
|
@ -24,6 +24,16 @@ async def give_permission_on_dataset(
|
|||
dataset_id: UUID,
|
||||
permission_name: str,
|
||||
):
|
||||
"""
|
||||
Give a specific permission on a dataset to a user.
|
||||
Args:
|
||||
principal: User who is being given the permission on the dataset
|
||||
dataset_id: Id of the dataset
|
||||
permission_name: Name of permission to give
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
|
|||
|
|
@ -21,6 +21,17 @@ from cognee.modules.users.models import (
|
|||
|
||||
|
||||
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
|
||||
"""
|
||||
Add a user with the given id to the role with the given id.
|
||||
Args:
|
||||
user_id: Id of the user.
|
||||
role_id: Id of the role.
|
||||
owner_id: Id of the request owner.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
||||
|
|
|
|||
|
|
@ -16,6 +16,16 @@ async def create_role(
|
|||
role_name: str,
|
||||
owner_id: UUID,
|
||||
):
|
||||
"""
|
||||
Create a new role with the given name, if the request owner with the given id
|
||||
has the necessary permission.
|
||||
Args:
|
||||
role_name: Name of the new role.
|
||||
owner_id: Id of the request owner.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
user = await get_user(owner_id)
|
||||
|
|
|
|||
|
|
@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
|
|||
|
||||
|
||||
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
|
||||
"""
|
||||
Add a user with the given id to the tenant with the given id.
|
||||
This can only be successful if the request owner with the given id is the tenant owner.
|
||||
Args:
|
||||
user_id: Id of the user.
|
||||
tenant_id: Id of the tenant.
|
||||
owner_id: Id of the request owner.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
user = await get_user(user_id)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,16 @@ from cognee.modules.users.methods import get_user
|
|||
|
||||
|
||||
async def create_tenant(tenant_name: str, user_id: UUID):
|
||||
"""
|
||||
Create a new tenant with the given name, for the user with the given id.
|
||||
This user is the owner of the tenant.
|
||||
Args:
|
||||
tenant_name: Name of the new tenant.
|
||||
user_id: Id of the user.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
db_engine = get_relational_engine()
|
||||
async with db_engine.get_async_session() as session:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -3,8 +3,14 @@ from typing import Type, List, Optional
|
|||
from pydantic import BaseModel
|
||||
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
|
||||
from cognee.tasks.storage.add_data_points import add_data_points
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.ontology_config import Config
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import (
|
||||
get_default_ontology_resolver,
|
||||
get_ontology_resolver_from_env,
|
||||
)
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.modules.graph.utils import (
|
||||
expand_with_nodes_and_edges,
|
||||
|
|
@ -24,9 +30,28 @@ async def integrate_chunk_graphs(
|
|||
data_chunks: list[DocumentChunk],
|
||||
chunk_graphs: list,
|
||||
graph_model: Type[BaseModel],
|
||||
ontology_adapter: OntologyResolver,
|
||||
ontology_resolver: BaseOntologyResolver,
|
||||
) -> List[DocumentChunk]:
|
||||
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
|
||||
"""Integrate chunk graphs with ontology validation and store in databases.
|
||||
|
||||
This function processes document chunks and their associated knowledge graphs,
|
||||
validates entities against an ontology resolver, and stores the integrated
|
||||
data points and edges in the configured databases.
|
||||
|
||||
Args:
|
||||
data_chunks: List of document chunks containing source data
|
||||
chunk_graphs: List of knowledge graphs corresponding to each chunk
|
||||
graph_model: Pydantic model class for graph data validation
|
||||
ontology_resolver: Resolver for validating entities against ontology
|
||||
|
||||
Returns:
|
||||
List of updated DocumentChunk objects with integrated data
|
||||
|
||||
Raises:
|
||||
InvalidChunkGraphInputError: If input validation fails
|
||||
InvalidGraphModelError: If graph model validation fails
|
||||
InvalidOntologyAdapterError: If ontology resolver validation fails
|
||||
"""
|
||||
|
||||
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
|
||||
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
|
||||
|
|
@ -36,9 +61,9 @@ async def integrate_chunk_graphs(
|
|||
)
|
||||
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
|
||||
raise InvalidGraphModelError(graph_model)
|
||||
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
|
||||
if ontology_resolver is None or not hasattr(ontology_resolver, "get_subgraph"):
|
||||
raise InvalidOntologyAdapterError(
|
||||
type(ontology_adapter).__name__ if ontology_adapter else "None"
|
||||
type(ontology_resolver).__name__ if ontology_resolver else "None"
|
||||
)
|
||||
|
||||
graph_engine = await get_graph_engine()
|
||||
|
|
@ -55,7 +80,7 @@ async def integrate_chunk_graphs(
|
|||
)
|
||||
|
||||
graph_nodes, graph_edges = expand_with_nodes_and_edges(
|
||||
data_chunks, chunk_graphs, ontology_adapter, existing_edges_map
|
||||
data_chunks, chunk_graphs, ontology_resolver, existing_edges_map
|
||||
)
|
||||
|
||||
if len(graph_nodes) > 0:
|
||||
|
|
@ -70,7 +95,7 @@ async def integrate_chunk_graphs(
|
|||
async def extract_graph_from_data(
|
||||
data_chunks: List[DocumentChunk],
|
||||
graph_model: Type[BaseModel],
|
||||
ontology_adapter: OntologyResolver = None,
|
||||
config: Config = None,
|
||||
custom_prompt: Optional[str] = None,
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
|
|
@ -101,6 +126,24 @@ async def extract_graph_from_data(
|
|||
if edge.source_node_id in valid_node_ids and edge.target_node_id in valid_node_ids
|
||||
]
|
||||
|
||||
return await integrate_chunk_graphs(
|
||||
data_chunks, chunk_graphs, graph_model, ontology_adapter or OntologyResolver()
|
||||
)
|
||||
# Extract resolver from config if provided, otherwise get default
|
||||
if config is None:
|
||||
ontology_config = get_ontology_env_config()
|
||||
if (
|
||||
ontology_config.ontology_file_path
|
||||
and ontology_config.ontology_resolver
|
||||
and ontology_config.matching_strategy
|
||||
):
|
||||
config: Config = {
|
||||
"ontology_config": {
|
||||
"ontology_resolver": get_ontology_resolver_from_env(**ontology_config.to_dict())
|
||||
}
|
||||
}
|
||||
else:
|
||||
config: Config = {
|
||||
"ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
|
||||
}
|
||||
|
||||
ontology_resolver = config["ontology_config"]["ontology_resolver"]
|
||||
|
||||
return await integrate_chunk_graphs(data_chunks, chunk_graphs, graph_model, ontology_resolver)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from typing import List
|
|||
|
||||
from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_nodes import extract_nodes
|
||||
from cognee.tasks.graph.cascade_extract.utils.extract_content_nodes_and_relationship_names import (
|
||||
extract_content_nodes_and_relationship_names,
|
||||
|
|
@ -17,9 +17,21 @@ from cognee.tasks.graph.extract_graph_from_data import integrate_chunk_graphs
|
|||
async def extract_graph_from_data(
|
||||
data_chunks: List[DocumentChunk],
|
||||
n_rounds: int = 2,
|
||||
ontology_adapter: OntologyResolver = None,
|
||||
ontology_adapter: BaseOntologyResolver = None,
|
||||
) -> List[DocumentChunk]:
|
||||
"""Extract and update graph data from document chunks in multiple steps."""
|
||||
"""Extract and update graph data from document chunks using cascade extraction.
|
||||
|
||||
This function performs multi-step graph extraction from document chunks,
|
||||
using cascade extraction techniques to build comprehensive knowledge graphs.
|
||||
|
||||
Args:
|
||||
data_chunks: List of document chunks to process
|
||||
n_rounds: Number of extraction rounds to perform (default: 2)
|
||||
ontology_adapter: Resolver for validating entities against ontology
|
||||
|
||||
Returns:
|
||||
List of updated DocumentChunk objects with extracted graph data
|
||||
"""
|
||||
chunk_nodes = await asyncio.gather(
|
||||
*[extract_nodes(chunk.text, n_rounds) for chunk in data_chunks]
|
||||
)
|
||||
|
|
@ -44,5 +56,5 @@ async def extract_graph_from_data(
|
|||
data_chunks=data_chunks,
|
||||
chunk_graphs=chunk_graphs,
|
||||
graph_model=KnowledgeGraph,
|
||||
ontology_adapter=ontology_adapter or OntologyResolver(),
|
||||
ontology_adapter=ontology_adapter,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
import pytest
|
||||
from rdflib import Graph, Namespace, RDF, OWL, RDFS
|
||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver, AttachedOntologyNode
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.models import AttachedOntologyNode
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_default_ontology_resolver
|
||||
|
||||
|
||||
def test_ontology_adapter_initialization_success():
|
||||
"""Test successful initialization of OntologyAdapter."""
|
||||
"""Test successful initialization of RDFLibOntologyResolver from get_default_ontology_resolver."""
|
||||
|
||||
adapter = OntologyResolver()
|
||||
adapter = get_default_ontology_resolver()
|
||||
adapter.build_lookup()
|
||||
|
||||
assert isinstance(adapter.lookup, dict)
|
||||
|
|
@ -14,7 +16,7 @@ def test_ontology_adapter_initialization_success():
|
|||
|
||||
def test_ontology_adapter_initialization_file_not_found():
|
||||
"""Test OntologyAdapter initialization with nonexistent file."""
|
||||
adapter = OntologyResolver(ontology_file="nonexistent.owl")
|
||||
adapter = RDFLibOntologyResolver(ontology_file="nonexistent.owl")
|
||||
assert adapter.graph is None
|
||||
|
||||
|
||||
|
|
@ -27,7 +29,7 @@ def test_build_lookup():
|
|||
|
||||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -50,7 +52,7 @@ def test_find_closest_match_exact():
|
|||
g.add((ns.Car, RDF.type, OWL.Class))
|
||||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -71,7 +73,7 @@ def test_find_closest_match_fuzzy():
|
|||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
g.add((ns.BMW, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -92,7 +94,7 @@ def test_find_closest_match_no_match():
|
|||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
g.add((ns.BMW, RDF.type, ns.Car))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -102,10 +104,10 @@ def test_find_closest_match_no_match():
|
|||
|
||||
|
||||
def test_get_subgraph_no_match_rdflib():
|
||||
"""Test get_subgraph returns empty results for a non-existent node."""
|
||||
"""Test get_subgraph returns empty results for a non-existent node using RDFLibOntologyResolver."""
|
||||
g = Graph()
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = get_default_ontology_resolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -138,7 +140,7 @@ def test_get_subgraph_success_rdflib():
|
|||
g.add((ns.VW, owns, ns.Audi))
|
||||
g.add((ns.VW, owns, ns.Porsche))
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = RDFLibOntologyResolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -160,10 +162,10 @@ def test_get_subgraph_success_rdflib():
|
|||
|
||||
|
||||
def test_refresh_lookup_rdflib():
|
||||
"""Test that refresh_lookup rebuilds the lookup dict into a new object."""
|
||||
"""Test that refresh_lookup rebuilds the lookup dict into a new object using RDFLibOntologyResolver."""
|
||||
g = Graph()
|
||||
|
||||
resolver = OntologyResolver()
|
||||
resolver = get_default_ontology_resolver()
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
|
|
@ -172,3 +174,318 @@ def test_refresh_lookup_rdflib():
|
|||
resolver.refresh_lookup()
|
||||
|
||||
assert resolver.lookup is not original_lookup
|
||||
|
||||
|
||||
def test_fuzzy_matching_strategy_exact_match():
|
||||
"""Test FuzzyMatchingStrategy finds exact matches."""
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
strategy = FuzzyMatchingStrategy()
|
||||
candidates = ["audi", "bmw", "mercedes"]
|
||||
|
||||
result = strategy.find_match("audi", candidates)
|
||||
assert result == "audi"
|
||||
|
||||
|
||||
def test_fuzzy_matching_strategy_fuzzy_match():
|
||||
"""Test FuzzyMatchingStrategy finds fuzzy matches."""
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
strategy = FuzzyMatchingStrategy(cutoff=0.6)
|
||||
candidates = ["audi", "bmw", "mercedes"]
|
||||
|
||||
result = strategy.find_match("audii", candidates)
|
||||
assert result == "audi"
|
||||
|
||||
|
||||
def test_fuzzy_matching_strategy_no_match():
|
||||
"""Test FuzzyMatchingStrategy returns None when no match meets cutoff."""
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
strategy = FuzzyMatchingStrategy(cutoff=0.9)
|
||||
candidates = ["audi", "bmw", "mercedes"]
|
||||
|
||||
result = strategy.find_match("completely_different", candidates)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_fuzzy_matching_strategy_empty_candidates():
|
||||
"""Test FuzzyMatchingStrategy handles empty candidates list."""
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
strategy = FuzzyMatchingStrategy()
|
||||
|
||||
result = strategy.find_match("audi", [])
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_base_ontology_resolver_initialization():
|
||||
"""Test BaseOntologyResolver initialization with default matching strategy."""
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
class TestOntologyResolver(BaseOntologyResolver):
|
||||
def build_lookup(self):
|
||||
pass
|
||||
|
||||
def refresh_lookup(self):
|
||||
pass
|
||||
|
||||
def find_closest_match(self, name, category):
|
||||
return None
|
||||
|
||||
def get_subgraph(self, node_name, node_type="individuals", directed=True):
|
||||
return [], [], None
|
||||
|
||||
resolver = TestOntologyResolver()
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
|
||||
|
||||
def test_base_ontology_resolver_custom_matching_strategy():
|
||||
"""Test BaseOntologyResolver initialization with custom matching strategy."""
|
||||
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
||||
|
||||
class CustomMatchingStrategy(MatchingStrategy):
|
||||
def find_match(self, name, candidates):
|
||||
return "custom_match"
|
||||
|
||||
class TestOntologyResolver(BaseOntologyResolver):
|
||||
def build_lookup(self):
|
||||
pass
|
||||
|
||||
def refresh_lookup(self):
|
||||
pass
|
||||
|
||||
def find_closest_match(self, name, category):
|
||||
return None
|
||||
|
||||
def get_subgraph(self, node_name, node_type="individuals", directed=True):
|
||||
return [], [], None
|
||||
|
||||
custom_strategy = CustomMatchingStrategy()
|
||||
resolver = TestOntologyResolver(matching_strategy=custom_strategy)
|
||||
assert resolver.matching_strategy == custom_strategy
|
||||
|
||||
|
||||
def test_ontology_config_structure():
|
||||
"""Test TypedDict structure for ontology configuration."""
|
||||
from cognee.modules.ontology.ontology_config import Config
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
matching_strategy = FuzzyMatchingStrategy()
|
||||
resolver = RDFLibOntologyResolver(matching_strategy=matching_strategy)
|
||||
|
||||
config: Config = {"ontology_config": {"ontology_resolver": resolver}}
|
||||
|
||||
assert config["ontology_config"]["ontology_resolver"] == resolver
|
||||
|
||||
|
||||
def test_get_ontology_resolver_default():
|
||||
"""Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
resolver = get_default_ontology_resolver()
|
||||
|
||||
assert isinstance(resolver, RDFLibOntologyResolver)
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
|
||||
|
||||
def test_get_default_ontology_resolver():
|
||||
"""Test get_default_ontology_resolver returns a properly configured RDFLibOntologyResolver with FuzzyMatchingStrategy."""
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
resolver = get_default_ontology_resolver()
|
||||
|
||||
assert isinstance(resolver, RDFLibOntologyResolver)
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
|
||||
|
||||
def test_rdflib_ontology_resolver_uses_matching_strategy():
|
||||
"""Test that RDFLibOntologyResolver uses the provided matching strategy."""
|
||||
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
||||
|
||||
class TestMatchingStrategy(MatchingStrategy):
|
||||
def find_match(self, name, candidates):
|
||||
return "test_match" if candidates else None
|
||||
|
||||
ns = Namespace("http://example.org/test#")
|
||||
g = Graph()
|
||||
g.add((ns.Car, RDF.type, OWL.Class))
|
||||
g.add((ns.Audi, RDF.type, ns.Car))
|
||||
|
||||
resolver = RDFLibOntologyResolver(matching_strategy=TestMatchingStrategy())
|
||||
resolver.graph = g
|
||||
resolver.build_lookup()
|
||||
|
||||
result = resolver.find_closest_match("Audi", "individuals")
|
||||
assert result == "test_match"
|
||||
|
||||
|
||||
def test_rdflib_ontology_resolver_default_matching_strategy():
|
||||
"""Test that RDFLibOntologyResolver uses FuzzyMatchingStrategy by default."""
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
resolver = RDFLibOntologyResolver()
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_success():
|
||||
"""Test get_ontology_resolver_from_env returns correct resolver with valid parameters."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
resolver = get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
|
||||
)
|
||||
|
||||
assert isinstance(resolver, RDFLibOntologyResolver)
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
assert resolver.ontology_file == "/test/path.owl"
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_unsupported_resolver():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for unsupported resolver."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="unsupported",
|
||||
matching_strategy="fuzzy",
|
||||
ontology_file_path="/test/path.owl",
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver: unsupported" in str(exc_info.value)
|
||||
assert "Supported resolvers are: RdfLib with FuzzyMatchingStrategy" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_unsupported_strategy():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for unsupported strategy."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib",
|
||||
matching_strategy="unsupported",
|
||||
ontology_file_path="/test/path.owl",
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_empty_file_path():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty file path."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=""
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_none_file_path():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for None file path."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path=None
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_empty_resolver():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty resolver."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver:" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_empty_strategy():
|
||||
"""Test get_ontology_resolver_from_env raises EnvironmentError for empty strategy."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib", matching_strategy="", ontology_file_path="/test/path.owl"
|
||||
)
|
||||
|
||||
assert "Unsupported ontology resolver: rdflib" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_default_parameters():
|
||||
"""Test get_ontology_resolver_from_env with default empty parameters raises EnvironmentError."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError) as exc_info:
|
||||
get_ontology_resolver_from_env()
|
||||
|
||||
assert "Unsupported ontology resolver:" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_case_sensitivity():
|
||||
"""Test get_ontology_resolver_from_env is case sensitive."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
with pytest.raises(EnvironmentError):
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="RDFLIB",
|
||||
matching_strategy="fuzzy",
|
||||
ontology_file_path="/test/path.owl",
|
||||
)
|
||||
|
||||
with pytest.raises(EnvironmentError):
|
||||
get_ontology_resolver_from_env(
|
||||
ontology_resolver="RdfLib",
|
||||
matching_strategy="fuzzy",
|
||||
ontology_file_path="/test/path.owl",
|
||||
)
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_with_actual_file():
|
||||
"""Test get_ontology_resolver_from_env works with actual file path."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.matching_strategies import FuzzyMatchingStrategy
|
||||
|
||||
resolver = get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib",
|
||||
matching_strategy="fuzzy",
|
||||
ontology_file_path="/path/to/ontology.owl",
|
||||
)
|
||||
|
||||
assert isinstance(resolver, RDFLibOntologyResolver)
|
||||
assert isinstance(resolver.matching_strategy, FuzzyMatchingStrategy)
|
||||
assert resolver.ontology_file == "/path/to/ontology.owl"
|
||||
|
||||
|
||||
def test_get_ontology_resolver_from_env_resolver_functionality():
|
||||
"""Test that resolver created from env function works correctly."""
|
||||
from cognee.modules.ontology.get_default_ontology_resolver import get_ontology_resolver_from_env
|
||||
|
||||
resolver = get_ontology_resolver_from_env(
|
||||
ontology_resolver="rdflib", matching_strategy="fuzzy", ontology_file_path="/test/path.owl"
|
||||
)
|
||||
|
||||
resolver.build_lookup()
|
||||
assert isinstance(resolver.lookup, dict)
|
||||
|
||||
result = resolver.find_closest_match("test", "individuals")
|
||||
assert result is None # Should return None for non-existent entity
|
||||
|
||||
nodes, relationships, start_node = resolver.get_subgraph("test", "individuals")
|
||||
assert nodes == []
|
||||
assert relationships == []
|
||||
assert start_node is None
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import cognee
|
|||
from cognee.api.v1.search import SearchType
|
||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||
from cognee.shared.logging_utils import setup_logging
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.ontology_config import Config
|
||||
|
||||
text_1 = """
|
||||
1. Audi
|
||||
|
|
@ -60,7 +62,14 @@ async def main():
|
|||
os.path.dirname(os.path.abspath(__file__)), "ontology_input_example/basic_ontology.owl"
|
||||
)
|
||||
|
||||
await cognee.cognify(ontology_file_path=ontology_path)
|
||||
# Create full config structure manually
|
||||
config: Config = {
|
||||
"ontology_config": {
|
||||
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
|
||||
}
|
||||
}
|
||||
|
||||
await cognee.cognify(config=config)
|
||||
print("Knowledge with ontology created.")
|
||||
|
||||
# Step 4: Query insights
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import os
|
|||
import textwrap
|
||||
from cognee.api.v1.search import SearchType
|
||||
from cognee.api.v1.visualize.visualize import visualize_graph
|
||||
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
||||
from cognee.modules.ontology.ontology_config import Config
|
||||
|
||||
|
||||
async def run_pipeline(ontology_path=None):
|
||||
|
|
@ -17,7 +19,13 @@ async def run_pipeline(ontology_path=None):
|
|||
|
||||
await cognee.add(scientific_papers_dir)
|
||||
|
||||
pipeline_run = await cognee.cognify(ontology_file_path=ontology_path)
|
||||
config: Config = {
|
||||
"ontology_config": {
|
||||
"ontology_resolver": RDFLibOntologyResolver(ontology_file=ontology_path)
|
||||
}
|
||||
}
|
||||
|
||||
pipeline_run = await cognee.cognify(config=config)
|
||||
|
||||
return pipeline_run
|
||||
|
||||
|
|
|
|||
986
notebooks/ontology_demo.ipynb
vendored
986
notebooks/ontology_demo.ipynb
vendored
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,7 @@
|
|||
[project]
|
||||
name = "cognee"
|
||||
|
||||
version = "0.3.4.dev4"
|
||||
version = "0.3.4"
|
||||
description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
|
||||
authors = [
|
||||
{ name = "Vasilije Markovic" },
|
||||
|
|
@ -64,14 +64,13 @@ dependencies = [
|
|||
"pylance>=0.22.0,<1.0.0",
|
||||
"kuzu (==0.11.0)",
|
||||
"python-magic-bin<0.5 ; platform_system == 'Windows'", # Only needed for Windows
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
api = [
|
||||
"uvicorn>=0.34.0,<1.0.0",
|
||||
"gunicorn>=20.1.0,<24",
|
||||
"websockets>=15.0.1,<16.0.0"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
api=[]
|
||||
distributed = [
|
||||
"modal>=1.0.5,<2.0.0",
|
||||
]
|
||||
|
|
|
|||
30
uv.lock
generated
30
uv.lock
generated
|
|
@ -852,7 +852,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "cognee"
|
||||
version = "0.3.4.dev4"
|
||||
version = "0.3.4"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "aiofiles" },
|
||||
|
|
@ -864,6 +864,7 @@ dependencies = [
|
|||
{ name = "fastapi" },
|
||||
{ name = "fastapi-users", extra = ["sqlalchemy"] },
|
||||
{ name = "filetype" },
|
||||
{ name = "gunicorn" },
|
||||
{ name = "instructor" },
|
||||
{ name = "jinja2" },
|
||||
{ name = "kuzu" },
|
||||
|
|
@ -899,17 +900,14 @@ dependencies = [
|
|||
{ name = "tiktoken" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "unstructured", extra = ["pdf"] },
|
||||
{ name = "uvicorn" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
|
||||
[package.optional-dependencies]
|
||||
anthropic = [
|
||||
{ name = "anthropic" },
|
||||
]
|
||||
api = [
|
||||
{ name = "gunicorn" },
|
||||
{ name = "uvicorn" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
aws = [
|
||||
{ name = "s3fs", extra = ["boto3"] },
|
||||
]
|
||||
|
|
@ -1036,7 +1034,7 @@ requires-dist = [
|
|||
{ name = "google-generativeai", marker = "extra == 'gemini'", specifier = ">=0.8.4,<0.9" },
|
||||
{ name = "graphiti-core", marker = "extra == 'graphiti'", specifier = ">=0.7.0,<0.8" },
|
||||
{ name = "groq", marker = "extra == 'groq'", specifier = ">=0.8.0,<1.0.0" },
|
||||
{ name = "gunicorn", marker = "extra == 'api'", specifier = ">=20.1.0,<24" },
|
||||
{ name = "gunicorn", specifier = ">=20.1.0,<24" },
|
||||
{ name = "instructor", specifier = ">=1.9.1,<2.0.0" },
|
||||
{ name = "jinja2", specifier = ">=3.1.3,<4" },
|
||||
{ name = "kuzu", specifier = "==0.11.0" },
|
||||
|
|
@ -1105,8 +1103,8 @@ requires-dist = [
|
|||
{ name = "typing-extensions", specifier = ">=4.12.2,<5.0.0" },
|
||||
{ name = "unstructured", extras = ["csv", "doc", "docx", "epub", "md", "odt", "org", "ppt", "pptx", "rst", "rtf", "tsv", "xlsx"], marker = "extra == 'docs'", specifier = ">=0.18.1,<19" },
|
||||
{ name = "unstructured", extras = ["pdf"], specifier = ">=0.18.1,<19" },
|
||||
{ name = "uvicorn", marker = "extra == 'api'", specifier = ">=0.34.0,<1.0.0" },
|
||||
{ name = "websockets", marker = "extra == 'api'", specifier = ">=15.0.1,<16.0.0" },
|
||||
{ name = "uvicorn", specifier = ">=0.34.0,<1.0.0" },
|
||||
{ name = "websockets", specifier = ">=15.0.1,<16.0.0" },
|
||||
]
|
||||
provides-extras = ["api", "distributed", "neo4j", "neptune", "postgres", "postgres-binary", "notebook", "langchain", "llama-index", "gemini", "huggingface", "ollama", "mistral", "anthropic", "deepeval", "posthog", "falkordb", "groq", "chromadb", "docs", "codegraph", "evals", "gui", "graphiti", "aws", "dev", "debug"]
|
||||
|
||||
|
|
@ -4706,7 +4704,7 @@ name = "nvidia-cudnn-cu12"
|
|||
version = "9.10.2.21"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
|
||||
|
|
@ -4717,7 +4715,7 @@ name = "nvidia-cufft-cu12"
|
|||
version = "11.3.3.83"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
|
||||
|
|
@ -4744,9 +4742,9 @@ name = "nvidia-cusolver-cu12"
|
|||
version = "11.7.3.90"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-cublas-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-cusparse-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
{ name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
|
||||
|
|
@ -4757,7 +4755,7 @@ name = "nvidia-cusparse-cu12"
|
|||
version = "12.5.8.93"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
|
||||
|
|
@ -8204,7 +8202,7 @@ name = "triton"
|
|||
version = "3.4.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "setuptools", marker = "sys_platform != 'emscripten'" },
|
||||
{ name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'linux')" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue