Merge branch 'dev' into improve-dataset-delete

This commit is contained in:
Igor Ilic 2025-12-15 17:28:08 +01:00 committed by GitHub
commit 31a3410368
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 487 additions and 131 deletions

154
.github/workflows/release.yml vendored Normal file
View file

@ -0,0 +1,154 @@
name: release.yml
on:
workflow_dispatch:
inputs:
flavour:
required: true
default: dev
type: choice
options:
- dev
- main
description: Dev or Main release
test_mode:
required: true
type: boolean
description: Aka Dry Run. If true, it won't affect public indices or repositories
jobs:
release-github:
name: Create GitHub Release from ${{ inputs.flavour }}
outputs:
tag: ${{ steps.create_tag.outputs.tag }}
version: ${{ steps.create_tag.outputs.version }}
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- name: Check out ${{ inputs.flavour }}
uses: actions/checkout@v4
with:
ref: ${{ inputs.flavour }}
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Create and push git tag
id: create_tag
env:
TEST_MODE: ${{ inputs.test_mode }}
run: |
VERSION="$(uv version --short)"
TAG="v${VERSION}"
echo "Tag to create: ${TAG}"
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
if [ "$TEST_MODE" = "false" ]; then
git tag "${TAG}"
git push origin "${TAG}"
else
echo "Test mode is enabled. Skipping tag creation and push."
fi
- name: Create GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.create_tag.outputs.tag }}
generate_release_notes: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
release-pypi-package:
needs: release-github
name: Release PyPI Package from ${{ inputs.flavour }}
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Check out ${{ inputs.flavour }}
uses: actions/checkout@v4
with:
ref: ${{ inputs.flavour }}
- name: Install uv
uses: astral-sh/setup-uv@v7
- name: Install Python
run: uv python install
- name: Install dependencies
run: uv sync --locked --all-extras
- name: Build distributions
run: uv build
- name: Publish ${{ inputs.flavour }} release to TestPyPI
if: ${{ inputs.test_mode }}
env:
UV_PUBLISH_TOKEN: ${{ secrets.TEST_PYPI_TOKEN }}
run: uv publish --publish-url https://test.pypi.org/legacy/
- name: Publish ${{ inputs.flavour }} release to PyPI
if: ${{ !inputs.test_mode }}
env:
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: uv publish
release-docker-image:
needs: release-github
name: Release Docker Image from ${{ inputs.flavour }}
permissions:
contents: read
runs-on: ubuntu-latest
steps:
- name: Check out ${{ inputs.flavour }}
uses: actions/checkout@v4
with:
ref: ${{ inputs.flavour }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push Dev Docker Image
if: ${{ inputs.flavour == 'dev' }}
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: ${{ !inputs.test_mode }}
tags: cognee/cognee:${{ needs.release-github.outputs.version }}
labels: |
version=${{ needs.release-github.outputs.version }}
flavour=${{ inputs.flavour }}
cache-from: type=registry,ref=cognee/cognee:buildcache
cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max
- name: Build and push Main Docker Image
if: ${{ inputs.flavour == 'main' }}
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: ${{ !inputs.test_mode }}
tags: |
cognee/cognee:${{ needs.release-github.outputs.version }}
cognee/cognee:latest
labels: |
version=${{ needs.release-github.outputs.version }}
flavour=${{ inputs.flavour }}
cache-from: type=registry,ref=cognee/cognee:buildcache
cache-to: type=registry,ref=cognee/cognee:buildcache,mode=max

View file

@ -84,3 +84,93 @@ jobs:
EMBEDDING_DIMENSIONS: "3072"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-api-key:
name: Run Bedrock API Key Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Run Bedrock API Key Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_REGION_NAME: "eu-west-1"
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-aws-credentials:
name: Run Bedrock AWS Credentials Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Run Bedrock AWS Credentials Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_REGION_NAME: "eu-west-1"
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-aws-profile:
name: Run Bedrock AWS Profile Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Configure AWS Profile
run: |
mkdir -p ~/.aws
cat > ~/.aws/credentials << EOF
[bedrock-test]
aws_access_key_id = ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key = ${{ secrets.AWS_SECRET_ACCESS_KEY }}
EOF
- name: Run Bedrock AWS Profile Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_PROFILE_NAME: "bedrock-test"
AWS_REGION_NAME: "eu-west-1"
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py

View file

@ -3,7 +3,7 @@
Test client for Cognee MCP Server functionality.
This script tests all the tools and functions available in the Cognee MCP server,
including cognify, codify, search, prune, status checks, and utility functions.
including cognify, search, prune, status checks, and utility functions.
Usage:
# Set your OpenAI API key first
@ -23,6 +23,7 @@ import tempfile
import time
from contextlib import asynccontextmanager
from cognee.shared.logging_utils import setup_logging
from logging import ERROR, INFO
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
@ -35,7 +36,7 @@ from src.server import (
load_class,
)
# Set timeout for cognify/codify to complete in
# Set timeout for cognify to complete in
TIMEOUT = 5 * 60 # 5 min in seconds
@ -151,12 +152,9 @@ DEBUG = True
expected_tools = {
"cognify",
"codify",
"search",
"prune",
"cognify_status",
"codify_status",
"cognee_add_developer_rules",
"list_data",
"delete",
}
@ -247,106 +245,6 @@ DEBUG = True
}
print(f"{test_name} test failed: {e}")
async def test_codify(self):
"""Test the codify functionality using MCP client."""
print("\n🧪 Testing codify functionality...")
try:
async with self.mcp_server_session() as session:
codify_result = await session.call_tool(
"codify", arguments={"repo_path": self.test_repo_dir}
)
start = time.time() # mark the start
while True:
try:
# Wait a moment
await asyncio.sleep(5)
# Check if codify processing is finished
status_result = await session.call_tool("codify_status", arguments={})
if hasattr(status_result, "content") and status_result.content:
status_text = (
status_result.content[0].text
if status_result.content
else str(status_result)
)
else:
status_text = str(status_result)
if str(PipelineRunStatus.DATASET_PROCESSING_COMPLETED) in status_text:
break
elif time.time() - start > TIMEOUT:
raise TimeoutError("Codify did not complete in 5min")
except DatabaseNotCreatedError:
if time.time() - start > TIMEOUT:
raise TimeoutError("Database was not created in 5min")
self.test_results["codify"] = {
"status": "PASS",
"result": codify_result,
"message": "Codify executed successfully",
}
print("✅ Codify test passed")
except Exception as e:
self.test_results["codify"] = {
"status": "FAIL",
"error": str(e),
"message": "Codify test failed",
}
print(f"❌ Codify test failed: {e}")
async def test_cognee_add_developer_rules(self):
"""Test the cognee_add_developer_rules functionality using MCP client."""
print("\n🧪 Testing cognee_add_developer_rules functionality...")
try:
async with self.mcp_server_session() as session:
result = await session.call_tool(
"cognee_add_developer_rules", arguments={"base_path": self.test_data_dir}
)
start = time.time() # mark the start
while True:
try:
# Wait a moment
await asyncio.sleep(5)
# Check if developer rule cognify processing is finished
status_result = await session.call_tool("cognify_status", arguments={})
if hasattr(status_result, "content") and status_result.content:
status_text = (
status_result.content[0].text
if status_result.content
else str(status_result)
)
else:
status_text = str(status_result)
if str(PipelineRunStatus.DATASET_PROCESSING_COMPLETED) in status_text:
break
elif time.time() - start > TIMEOUT:
raise TimeoutError(
"Cognify of developer rules did not complete in 5min"
)
except DatabaseNotCreatedError:
if time.time() - start > TIMEOUT:
raise TimeoutError("Database was not created in 5min")
self.test_results["cognee_add_developer_rules"] = {
"status": "PASS",
"result": result,
"message": "Developer rules addition executed successfully",
}
print("✅ Developer rules test passed")
except Exception as e:
self.test_results["cognee_add_developer_rules"] = {
"status": "FAIL",
"error": str(e),
"message": "Developer rules test failed",
}
print(f"❌ Developer rules test failed: {e}")
async def test_search_functionality(self):
"""Test the search functionality with different search types using MCP client."""
print("\n🧪 Testing search functionality...")
@ -359,7 +257,11 @@ DEBUG = True
# Go through all Cognee search types
for search_type in SearchType:
# Don't test these search types
if search_type in [SearchType.NATURAL_LANGUAGE, SearchType.CYPHER]:
if search_type in [
SearchType.NATURAL_LANGUAGE,
SearchType.CYPHER,
SearchType.TRIPLET_COMPLETION,
]:
break
try:
async with self.mcp_server_session() as session:
@ -681,9 +583,6 @@ class TestModel:
test_name="Cognify2",
)
await self.test_codify()
await self.test_cognee_add_developer_rules()
# Test list_data and delete functionality
await self.test_list_data()
await self.test_delete()
@ -739,7 +638,5 @@ async def main():
if __name__ == "__main__":
from logging import ERROR
logger = setup_logging(log_level=ERROR)
asyncio.run(main())

View file

@ -155,7 +155,7 @@ async def add(
- LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
Optional:
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
- LLM_MODEL: Model name (default: "gpt-5-mini")
- DEFAULT_USER_EMAIL: Custom default user email
- DEFAULT_USER_PASSWORD: Custom default user password

View file

@ -53,6 +53,7 @@ async def cognify(
custom_prompt: Optional[str] = None,
temporal_cognify: bool = False,
data_per_batch: int = 20,
**kwargs
):
"""
Transform ingested data into a structured knowledge graph.
@ -223,6 +224,7 @@ async def cognify(
config=config,
custom_prompt=custom_prompt,
chunks_per_batch=chunks_per_batch,
**kwargs,
)
# By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@ -251,6 +253,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
config: Config = None,
custom_prompt: Optional[str] = None,
chunks_per_batch: int = 100,
**kwargs,
) -> list[Task]:
if config is None:
ontology_config = get_ontology_env_config()
@ -288,6 +291,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
config=config,
custom_prompt=custom_prompt,
task_config={"batch_size": chunks_per_batch},
**kwargs,
), # Generate knowledge graphs from the document chunks.
Task(
summarize_text,

View file

@ -42,7 +42,9 @@ class CognifyPayloadDTO(InDTO):
default="", description="Custom prompt for entity extraction and graph generation"
)
ontology_key: Optional[List[str]] = Field(
default=None, description="Reference to one or more previously uploaded ontologies"
default=None,
examples=[[]],
description="Reference to one or more previously uploaded ontologies",
)

View file

@ -9,6 +9,8 @@ class S3Config(BaseSettings):
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_session_token: Optional[str] = None
aws_profile_name: Optional[str] = None
aws_bedrock_runtime_endpoint: Optional[str] = None
model_config = SettingsConfigDict(env_file=".env", extra="allow")

View file

@ -11,7 +11,7 @@ class LLMGateway:
@staticmethod
def acreate_structured_output(
text_input: str, system_prompt: str, response_model: Type[BaseModel]
text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> Coroutine:
llm_config = get_llm_config()
if llm_config.structured_output_framework.upper() == "BAML":
@ -31,7 +31,7 @@ class LLMGateway:
llm_client = get_llm_client()
return llm_client.acreate_structured_output(
text_input=text_input, system_prompt=system_prompt, response_model=response_model
text_input=text_input, system_prompt=system_prompt, response_model=response_model, **kwargs
)
@staticmethod

View file

@ -10,7 +10,7 @@ from cognee.infrastructure.llm.config import (
async def extract_content_graph(
content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None, **kwargs
):
if custom_prompt:
system_prompt = custom_prompt
@ -30,7 +30,7 @@ async def extract_content_graph(
system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
content_graph = await LLMGateway.acreate_structured_output(
content, system_prompt, response_model
content, system_prompt, response_model, **kwargs
)
return content_graph

View file

@ -52,7 +52,7 @@ class AnthropicAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from a user query.

View file

@ -0,0 +1,5 @@
"""Bedrock LLM adapter module."""
from .adapter import BedrockAdapter
__all__ = ["BedrockAdapter"]

View file

@ -0,0 +1,153 @@
import litellm
import instructor
from typing import Type
from pydantic import BaseModel
from litellm.exceptions import ContentPolicyViolationError
from instructor.exceptions import InstructorRetryException
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
LLMInterface,
)
from cognee.infrastructure.llm.exceptions import (
ContentPolicyFilterError,
MissingSystemPromptPathError,
)
from cognee.infrastructure.files.storage.s3_config import get_s3_config
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
rate_limit_async,
rate_limit_sync,
sleep_and_retry_async,
sleep_and_retry_sync,
)
from cognee.modules.observability.get_observe import get_observe
observe = get_observe()
class BedrockAdapter(LLMInterface):
"""
Adapter for AWS Bedrock API with support for three authentication methods:
1. API Key (Bearer Token)
2. AWS Credentials (access key + secret key)
3. AWS Profile (boto3 credential chain)
"""
name = "Bedrock"
model: str
api_key: str
default_instructor_mode = "json_schema_mode"
MAX_RETRIES = 5
def __init__(
self,
model: str,
api_key: str = None,
max_completion_tokens: int = 16384,
streaming: bool = False,
instructor_mode: str = None,
):
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
self.aclient = instructor.from_litellm(
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
)
self.client = instructor.from_litellm(litellm.completion)
self.model = model
self.api_key = api_key
self.max_completion_tokens = max_completion_tokens
self.streaming = streaming
def _create_bedrock_request(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> dict:
"""Create Bedrock request with authentication."""
request_params = {
"model": self.model,
"custom_llm_provider": "bedrock",
"drop_params": True,
"messages": [
{"role": "user", "content": text_input},
{"role": "system", "content": system_prompt},
],
"response_model": response_model,
"max_retries": self.MAX_RETRIES,
"max_completion_tokens": self.max_completion_tokens,
"stream": self.streaming,
}
s3_config = get_s3_config()
# Add authentication parameters
if self.api_key:
request_params["api_key"] = self.api_key
elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key:
request_params["aws_access_key_id"] = s3_config.aws_access_key_id
request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key
if s3_config.aws_session_token:
request_params["aws_session_token"] = s3_config.aws_session_token
elif s3_config.aws_profile_name:
request_params["aws_profile_name"] = s3_config.aws_profile_name
if s3_config.aws_region:
request_params["aws_region_name"] = s3_config.aws_region
# Add optional parameters
if s3_config.aws_bedrock_runtime_endpoint:
request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint
return request_params
@observe(as_type="generation")
@sleep_and_retry_async()
@rate_limit_async
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> BaseModel:
"""Generate structured output from AWS Bedrock API."""
try:
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
return await self.aclient.chat.completions.create(**request_params)
except (
ContentPolicyViolationError,
InstructorRetryException,
) as error:
if (
isinstance(error, InstructorRetryException)
and "content management policy" not in str(error).lower()
):
raise error
raise ContentPolicyFilterError(
f"The provided input contains content that is not aligned with our content policy: {text_input}"
)
@observe
@sleep_and_retry_sync()
@rate_limit_sync
def create_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> BaseModel:
"""Generate structured output from AWS Bedrock API (synchronous)."""
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
return self.client.chat.completions.create(**request_params)
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise MissingSystemPromptPathError()
system_prompt = LLMGateway.read_query_prompt(system_prompt)
formatted_prompt = (
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
if system_prompt
else None
)
return formatted_prompt

View file

@ -80,7 +80,7 @@ class GeminiAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from a user query.

View file

@ -80,7 +80,7 @@ class GenericAPIAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from a user query.

View file

@ -24,6 +24,7 @@ class LLMProvider(Enum):
- CUSTOM: Represents a custom provider option.
- GEMINI: Represents the Gemini provider.
- MISTRAL: Represents the Mistral AI provider.
- BEDROCK: Represents the AWS Bedrock provider.
"""
OPENAI = "openai"
@ -32,6 +33,7 @@ class LLMProvider(Enum):
CUSTOM = "custom"
GEMINI = "gemini"
MISTRAL = "mistral"
BEDROCK = "bedrock"
def get_llm_client(raise_api_key_error: bool = True):
@ -154,7 +156,7 @@ def get_llm_client(raise_api_key_error: bool = True):
)
elif provider == LLMProvider.MISTRAL:
if llm_config.llm_api_key is None:
if llm_config.llm_api_key is None and raise_api_key_error:
raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
@ -169,5 +171,21 @@ def get_llm_client(raise_api_key_error: bool = True):
instructor_mode=llm_config.llm_instructor_mode.lower(),
)
elif provider == LLMProvider.BEDROCK:
# if llm_config.llm_api_key is None and raise_api_key_error:
# raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.bedrock.adapter import (
BedrockAdapter,
)
return BedrockAdapter(
model=llm_config.llm_model,
api_key=llm_config.llm_api_key,
max_completion_tokens=max_completion_tokens,
streaming=llm_config.llm_streaming,
instructor_mode=llm_config.llm_instructor_mode.lower(),
)
else:
raise UnsupportedLLMProviderError(provider)

View file

@ -69,7 +69,7 @@ class MistralAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from the user query.

View file

@ -76,7 +76,7 @@ class OllamaAPIAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a structured output from the LLM using the provided text and system prompt.
@ -123,7 +123,7 @@ class OllamaAPIAdapter(LLMInterface):
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
async def create_transcript(self, input_file: str) -> str:
async def create_transcript(self, input_file: str, **kwargs) -> str:
"""
Generate an audio transcript from a user query.
@ -162,7 +162,7 @@ class OllamaAPIAdapter(LLMInterface):
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
async def transcribe_image(self, input_file: str) -> str:
async def transcribe_image(self, input_file: str, **kwargs) -> str:
"""
Transcribe content from an image using base64 encoding.

View file

@ -112,7 +112,7 @@ class OpenAIAdapter(LLMInterface):
reraise=True,
)
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from a user query.
@ -154,6 +154,7 @@ class OpenAIAdapter(LLMInterface):
api_version=self.api_version,
response_model=response_model,
max_retries=self.MAX_RETRIES,
**kwargs,
)
except (
ContentFilterFinishReasonError,
@ -180,6 +181,7 @@ class OpenAIAdapter(LLMInterface):
# api_base=self.fallback_endpoint,
response_model=response_model,
max_retries=self.MAX_RETRIES,
**kwargs,
)
except (
ContentFilterFinishReasonError,
@ -205,7 +207,7 @@ class OpenAIAdapter(LLMInterface):
reraise=True,
)
def create_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
) -> BaseModel:
"""
Generate a response from a user query.
@ -245,6 +247,7 @@ class OpenAIAdapter(LLMInterface):
api_version=self.api_version,
response_model=response_model,
max_retries=self.MAX_RETRIES,
**kwargs,
)
@retry(
@ -254,7 +257,7 @@ class OpenAIAdapter(LLMInterface):
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
async def create_transcript(self, input):
async def create_transcript(self, input, **kwargs):
"""
Generate an audio transcript from a user query.
@ -281,6 +284,7 @@ class OpenAIAdapter(LLMInterface):
api_base=self.endpoint,
api_version=self.api_version,
max_retries=self.MAX_RETRIES,
**kwargs,
)
return transcription
@ -292,7 +296,7 @@ class OpenAIAdapter(LLMInterface):
before_sleep=before_sleep_log(logger, logging.DEBUG),
reraise=True,
)
async def transcribe_image(self, input) -> BaseModel:
async def transcribe_image(self, input, **kwargs) -> BaseModel:
"""
Generate a transcription of an image from a user query.
@ -337,4 +341,5 @@ class OpenAIAdapter(LLMInterface):
api_version=self.api_version,
max_completion_tokens=300,
max_retries=self.MAX_RETRIES,
**kwargs,
)

View file

@ -16,6 +16,7 @@ class ModelName(Enum):
anthropic = "anthropic"
gemini = "gemini"
mistral = "mistral"
bedrock = "bedrock"
class LLMConfig(BaseModel):
@ -77,6 +78,10 @@ def get_settings() -> SettingsDict:
"value": "mistral",
"label": "Mistral",
},
{
"value": "bedrock",
"label": "Bedrock",
},
]
return SettingsDict.model_validate(
@ -157,6 +162,20 @@ def get_settings() -> SettingsDict:
"label": "Mistral Large 2.1",
},
],
"bedrock": [
{
"value": "eu.anthropic.claude-sonnet-4-5-20250929-v1:0",
"label": "Claude 4.5 Sonnet",
},
{
"value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
"label": "Claude 4.5 Haiku",
},
{
"value": "eu.amazon.nova-lite-v1:0",
"label": "Amazon Nova Lite",
},
],
},
},
vector_db={

View file

@ -97,6 +97,7 @@ async def extract_graph_from_data(
graph_model: Type[BaseModel],
config: Config = None,
custom_prompt: Optional[str] = None,
**kwargs,
) -> List[DocumentChunk]:
"""
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
@ -111,7 +112,7 @@ async def extract_graph_from_data(
chunk_graphs = await asyncio.gather(
*[
extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt, **kwargs)
for chunk in data_chunks
]
)

View file

@ -1,5 +1,6 @@
from typing import AsyncGenerator, Dict, Any, List, Optional
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
from cognee.modules.engine.utils import generate_node_id
from cognee.shared.logging_utils import get_logger
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
from cognee.infrastructure.engine import DataPoint
@ -155,7 +156,12 @@ def _process_single_triplet(
embeddable_text = f"{start_node_text}-{relationship_text}-{end_node_text}".strip()
triplet_obj = Triplet(from_node_id=start_node_id, to_node_id=end_node_id, text=embeddable_text)
relationship_name = relationship.get("relationship_name", "")
triplet_id = generate_node_id(str(start_node_id) + str(relationship_name) + str(end_node_id))
triplet_obj = Triplet(
id=triplet_id, from_node_id=start_node_id, to_node_id=end_node_id, text=embeddable_text
)
return triplet_obj, None