Merge branch 'dev' into fix-mcp-migrations

This commit is contained in:
Igor Ilic 2025-12-15 16:45:57 +01:00 committed by GitHub
commit 882abaf83c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 307 additions and 115 deletions

View file

@ -84,3 +84,93 @@ jobs:
EMBEDDING_DIMENSIONS: "3072"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-api-key:
name: Run Bedrock API Key Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Run Bedrock API Key Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_REGION_NAME: "eu-west-1"
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-aws-credentials:
name: Run Bedrock AWS Credentials Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Run Bedrock AWS Credentials Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_REGION_NAME: "eu-west-1"
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_API_KEY: ${{ secrets.BEDROCK_API_KEY }}
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py
test-bedrock-aws-profile:
name: Run Bedrock AWS Profile Test
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
extra-dependencies: "aws"
- name: Configure AWS Profile
run: |
mkdir -p ~/.aws
cat > ~/.aws/credentials << EOF
[bedrock-test]
aws_access_key_id = ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key = ${{ secrets.AWS_SECRET_ACCESS_KEY }}
EOF
- name: Run Bedrock AWS Profile Simple Example
env:
LLM_PROVIDER: "bedrock"
LLM_MODEL: "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
LLM_MAX_TOKENS: "16384"
AWS_PROFILE_NAME: "bedrock-test"
AWS_REGION_NAME: "eu-west-1"
EMBEDDING_PROVIDER: "bedrock"
EMBEDDING_MODEL: "amazon.titan-embed-text-v2:0"
EMBEDDING_DIMENSIONS: "1024"
EMBEDDING_MAX_TOKENS: "8191"
run: uv run python ./examples/python/simple_example.py

View file

@ -3,7 +3,7 @@
Test client for Cognee MCP Server functionality.
This script tests all the tools and functions available in the Cognee MCP server,
including cognify, codify, search, prune, status checks, and utility functions.
including cognify, search, prune, status checks, and utility functions.
Usage:
# Set your OpenAI API key first
@ -23,6 +23,7 @@ import tempfile
import time
from contextlib import asynccontextmanager
from cognee.shared.logging_utils import setup_logging
from logging import ERROR, INFO
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
@ -35,7 +36,7 @@ from src.server import (
load_class,
)
# Set timeout for cognify/codify to complete in
# Set timeout for cognify to complete in
TIMEOUT = 5 * 60 # 5 min in seconds
@ -151,12 +152,9 @@ DEBUG = True
expected_tools = {
"cognify",
"codify",
"search",
"prune",
"cognify_status",
"codify_status",
"cognee_add_developer_rules",
"list_data",
"delete",
}
@ -247,106 +245,6 @@ DEBUG = True
}
print(f"{test_name} test failed: {e}")
async def test_codify(self):
"""Test the codify functionality using MCP client."""
print("\n🧪 Testing codify functionality...")
try:
async with self.mcp_server_session() as session:
codify_result = await session.call_tool(
"codify", arguments={"repo_path": self.test_repo_dir}
)
start = time.time() # mark the start
while True:
try:
# Wait a moment
await asyncio.sleep(5)
# Check if codify processing is finished
status_result = await session.call_tool("codify_status", arguments={})
if hasattr(status_result, "content") and status_result.content:
status_text = (
status_result.content[0].text
if status_result.content
else str(status_result)
)
else:
status_text = str(status_result)
if str(PipelineRunStatus.DATASET_PROCESSING_COMPLETED) in status_text:
break
elif time.time() - start > TIMEOUT:
raise TimeoutError("Codify did not complete in 5min")
except DatabaseNotCreatedError:
if time.time() - start > TIMEOUT:
raise TimeoutError("Database was not created in 5min")
self.test_results["codify"] = {
"status": "PASS",
"result": codify_result,
"message": "Codify executed successfully",
}
print("✅ Codify test passed")
except Exception as e:
self.test_results["codify"] = {
"status": "FAIL",
"error": str(e),
"message": "Codify test failed",
}
print(f"❌ Codify test failed: {e}")
async def test_cognee_add_developer_rules(self):
"""Test the cognee_add_developer_rules functionality using MCP client."""
print("\n🧪 Testing cognee_add_developer_rules functionality...")
try:
async with self.mcp_server_session() as session:
result = await session.call_tool(
"cognee_add_developer_rules", arguments={"base_path": self.test_data_dir}
)
start = time.time() # mark the start
while True:
try:
# Wait a moment
await asyncio.sleep(5)
# Check if developer rule cognify processing is finished
status_result = await session.call_tool("cognify_status", arguments={})
if hasattr(status_result, "content") and status_result.content:
status_text = (
status_result.content[0].text
if status_result.content
else str(status_result)
)
else:
status_text = str(status_result)
if str(PipelineRunStatus.DATASET_PROCESSING_COMPLETED) in status_text:
break
elif time.time() - start > TIMEOUT:
raise TimeoutError(
"Cognify of developer rules did not complete in 5min"
)
except DatabaseNotCreatedError:
if time.time() - start > TIMEOUT:
raise TimeoutError("Database was not created in 5min")
self.test_results["cognee_add_developer_rules"] = {
"status": "PASS",
"result": result,
"message": "Developer rules addition executed successfully",
}
print("✅ Developer rules test passed")
except Exception as e:
self.test_results["cognee_add_developer_rules"] = {
"status": "FAIL",
"error": str(e),
"message": "Developer rules test failed",
}
print(f"❌ Developer rules test failed: {e}")
async def test_search_functionality(self):
"""Test the search functionality with different search types using MCP client."""
print("\n🧪 Testing search functionality...")
@ -359,7 +257,11 @@ DEBUG = True
# Go through all Cognee search types
for search_type in SearchType:
# Don't test these search types
if search_type in [SearchType.NATURAL_LANGUAGE, SearchType.CYPHER]:
if search_type in [
SearchType.NATURAL_LANGUAGE,
SearchType.CYPHER,
SearchType.TRIPLET_COMPLETION,
]:
break
try:
async with self.mcp_server_session() as session:
@ -681,9 +583,6 @@ class TestModel:
test_name="Cognify2",
)
await self.test_codify()
await self.test_cognee_add_developer_rules()
# Test list_data and delete functionality
await self.test_list_data()
await self.test_delete()
@ -739,7 +638,5 @@ async def main():
if __name__ == "__main__":
from logging import ERROR
logger = setup_logging(log_level=ERROR)
asyncio.run(main())

View file

@ -155,7 +155,7 @@ async def add(
- LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
Optional:
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
- LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
- LLM_MODEL: Model name (default: "gpt-5-mini")
- DEFAULT_USER_EMAIL: Custom default user email
- DEFAULT_USER_PASSWORD: Custom default user password

View file

@ -42,7 +42,9 @@ class CognifyPayloadDTO(InDTO):
default="", description="Custom prompt for entity extraction and graph generation"
)
ontology_key: Optional[List[str]] = Field(
default=None, description="Reference to one or more previously uploaded ontologies"
default=None,
examples=[[]],
description="Reference to one or more previously uploaded ontologies",
)

View file

@ -9,6 +9,8 @@ class S3Config(BaseSettings):
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_session_token: Optional[str] = None
aws_profile_name: Optional[str] = None
aws_bedrock_runtime_endpoint: Optional[str] = None
model_config = SettingsConfigDict(env_file=".env", extra="allow")

View file

@ -0,0 +1,5 @@
"""Bedrock LLM adapter module."""
from .adapter import BedrockAdapter
__all__ = ["BedrockAdapter"]

View file

@ -0,0 +1,153 @@
import litellm
import instructor
from typing import Type
from pydantic import BaseModel
from litellm.exceptions import ContentPolicyViolationError
from instructor.exceptions import InstructorRetryException
from cognee.infrastructure.llm.LLMGateway import LLMGateway
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
LLMInterface,
)
from cognee.infrastructure.llm.exceptions import (
ContentPolicyFilterError,
MissingSystemPromptPathError,
)
from cognee.infrastructure.files.storage.s3_config import get_s3_config
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
rate_limit_async,
rate_limit_sync,
sleep_and_retry_async,
sleep_and_retry_sync,
)
from cognee.modules.observability.get_observe import get_observe
observe = get_observe()
class BedrockAdapter(LLMInterface):
"""
Adapter for AWS Bedrock API with support for three authentication methods:
1. API Key (Bearer Token)
2. AWS Credentials (access key + secret key)
3. AWS Profile (boto3 credential chain)
"""
name = "Bedrock"
model: str
api_key: str
default_instructor_mode = "json_schema_mode"
MAX_RETRIES = 5
def __init__(
self,
model: str,
api_key: str = None,
max_completion_tokens: int = 16384,
streaming: bool = False,
instructor_mode: str = None,
):
self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
self.aclient = instructor.from_litellm(
litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
)
self.client = instructor.from_litellm(litellm.completion)
self.model = model
self.api_key = api_key
self.max_completion_tokens = max_completion_tokens
self.streaming = streaming
def _create_bedrock_request(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> dict:
"""Create Bedrock request with authentication."""
request_params = {
"model": self.model,
"custom_llm_provider": "bedrock",
"drop_params": True,
"messages": [
{"role": "user", "content": text_input},
{"role": "system", "content": system_prompt},
],
"response_model": response_model,
"max_retries": self.MAX_RETRIES,
"max_completion_tokens": self.max_completion_tokens,
"stream": self.streaming,
}
s3_config = get_s3_config()
# Add authentication parameters
if self.api_key:
request_params["api_key"] = self.api_key
elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key:
request_params["aws_access_key_id"] = s3_config.aws_access_key_id
request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key
if s3_config.aws_session_token:
request_params["aws_session_token"] = s3_config.aws_session_token
elif s3_config.aws_profile_name:
request_params["aws_profile_name"] = s3_config.aws_profile_name
if s3_config.aws_region:
request_params["aws_region_name"] = s3_config.aws_region
# Add optional parameters
if s3_config.aws_bedrock_runtime_endpoint:
request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint
return request_params
@observe(as_type="generation")
@sleep_and_retry_async()
@rate_limit_async
async def acreate_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> BaseModel:
"""Generate structured output from AWS Bedrock API."""
try:
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
return await self.aclient.chat.completions.create(**request_params)
except (
ContentPolicyViolationError,
InstructorRetryException,
) as error:
if (
isinstance(error, InstructorRetryException)
and "content management policy" not in str(error).lower()
):
raise error
raise ContentPolicyFilterError(
f"The provided input contains content that is not aligned with our content policy: {text_input}"
)
@observe
@sleep_and_retry_sync()
@rate_limit_sync
def create_structured_output(
self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
) -> BaseModel:
"""Generate structured output from AWS Bedrock API (synchronous)."""
request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
return self.client.chat.completions.create(**request_params)
def show_prompt(self, text_input: str, system_prompt: str) -> str:
"""Format and display the prompt for a user query."""
if not text_input:
text_input = "No user input provided."
if not system_prompt:
raise MissingSystemPromptPathError()
system_prompt = LLMGateway.read_query_prompt(system_prompt)
formatted_prompt = (
f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
if system_prompt
else None
)
return formatted_prompt

View file

@ -24,6 +24,7 @@ class LLMProvider(Enum):
- CUSTOM: Represents a custom provider option.
- GEMINI: Represents the Gemini provider.
- MISTRAL: Represents the Mistral AI provider.
- BEDROCK: Represents the AWS Bedrock provider.
"""
OPENAI = "openai"
@ -32,6 +33,7 @@ class LLMProvider(Enum):
CUSTOM = "custom"
GEMINI = "gemini"
MISTRAL = "mistral"
BEDROCK = "bedrock"
def get_llm_client(raise_api_key_error: bool = True):
@ -154,7 +156,7 @@ def get_llm_client(raise_api_key_error: bool = True):
)
elif provider == LLMProvider.MISTRAL:
if llm_config.llm_api_key is None:
if llm_config.llm_api_key is None and raise_api_key_error:
raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
@ -169,5 +171,21 @@ def get_llm_client(raise_api_key_error: bool = True):
instructor_mode=llm_config.llm_instructor_mode.lower(),
)
elif provider == LLMProvider.BEDROCK:
# if llm_config.llm_api_key is None and raise_api_key_error:
# raise LLMAPIKeyNotSetError()
from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.bedrock.adapter import (
BedrockAdapter,
)
return BedrockAdapter(
model=llm_config.llm_model,
api_key=llm_config.llm_api_key,
max_completion_tokens=max_completion_tokens,
streaming=llm_config.llm_streaming,
instructor_mode=llm_config.llm_instructor_mode.lower(),
)
else:
raise UnsupportedLLMProviderError(provider)

View file

@ -16,6 +16,7 @@ class ModelName(Enum):
anthropic = "anthropic"
gemini = "gemini"
mistral = "mistral"
bedrock = "bedrock"
class LLMConfig(BaseModel):
@ -77,6 +78,10 @@ def get_settings() -> SettingsDict:
"value": "mistral",
"label": "Mistral",
},
{
"value": "bedrock",
"label": "Bedrock",
},
]
return SettingsDict.model_validate(
@ -157,6 +162,20 @@ def get_settings() -> SettingsDict:
"label": "Mistral Large 2.1",
},
],
"bedrock": [
{
"value": "eu.anthropic.claude-sonnet-4-5-20250929-v1:0",
"label": "Claude 4.5 Sonnet",
},
{
"value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
"label": "Claude 4.5 Haiku",
},
{
"value": "eu.amazon.nova-lite-v1:0",
"label": "Amazon Nova Lite",
},
],
},
},
vector_db={

View file

@ -1,5 +1,6 @@
from typing import AsyncGenerator, Dict, Any, List, Optional
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
from cognee.modules.engine.utils import generate_node_id
from cognee.shared.logging_utils import get_logger
from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
from cognee.infrastructure.engine import DataPoint
@ -155,7 +156,12 @@ def _process_single_triplet(
embeddable_text = f"{start_node_text}-{relationship_text}-{end_node_text}".strip()
triplet_obj = Triplet(from_node_id=start_node_id, to_node_id=end_node_id, text=embeddable_text)
relationship_name = relationship.get("relationship_name", "")
triplet_id = generate_node_id(str(start_node_id) + str(relationship_name) + str(end_node_id))
triplet_obj = Triplet(
id=triplet_id, from_node_id=start_node_id, to_node_id=end_node_id, text=embeddable_text
)
return triplet_obj, None