Add tests for responses api

2025-05-09 09:25:42 +02:00 · 2025-05-09 09:25:42 +02:00 · 97baec5bdb
commit 97baec5bdb
parent bacea87186
4 changed files with 674 additions and 0 deletions
--- a/.github/workflows/responses_api_tests.yml
+++ b/.github/workflows/responses_api_tests.yml
@ -0,0 +1,92 @@
+name: Reusable Responses API Tests
+
+on:
+  workflow_call:
+    inputs:
+      python-version:
+        required: false
+        type: string
+        default: '3.11.x'
+    secrets:
+      LLM_PROVIDER:
+        required: true
+      LLM_MODEL:
+        required: true
+      LLM_ENDPOINT:
+        required: true
+      LLM_API_KEY:
+        required: true
+      LLM_API_VERSION:
+        required: true
+      EMBEDDING_PROVIDER:
+        required: true
+      EMBEDDING_MODEL:
+        required: true
+      EMBEDDING_ENDPOINT:
+        required: true
+      EMBEDDING_API_KEY:
+        required: true
+      EMBEDDING_API_VERSION:
+        required: true
+
+env:
+  RUNTIME__LOG_LEVEL: ERROR
+  ENV: 'dev'
+
+jobs:
+  responses-api-tests:
+    name: Run Responses API Tests
+    runs-on: ubuntu-22.04
+    env:
+      LLM_PROVIDER: ${{ secrets.LLM_PROVIDER }}
+      LLM_MODEL: ${{ secrets.LLM_MODEL }}
+      LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
+      LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
+      LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
+
+      EMBEDDING_PROVIDER: ${{ secrets.EMBEDDING_PROVIDER }}
+      EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
+      EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
+      EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
+      EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Cognee Setup
+        uses: ./.github/actions/cognee_setup
+        with:
+          python-version: ${{ inputs.python-version }}
+
+      - name: Install httpx
+        run: poetry run pip install httpx
+
+      - name: Start Cognee API Server
+        run: |
+          # Start API server in the background and save the process ID
+          poetry run python run_cognee_api_server.py --env dev &
+          echo "API_SERVER_PID=$!" >> $GITHUB_ENV
+          
+          # Wait for the server to start
+          echo "Waiting for API server to start..."
+          sleep 10
+        
+      - name: Run Basic API Tests
+        run: |
+          echo "Running basic responses API tests..."
+          poetry run python test_cognee_responses_api.py
+        
+      - name: Run Comprehensive API Tests
+        run: |
+          echo "Running comprehensive responses API tests..."
+          poetry run python test_cognee_responses_api_comprehensive.py
+        
+      - name: Clean up API server
+        if: always()
+        run: |
+          if [ -n "${{ env.API_SERVER_PID }}" ]; then
+            echo "Shutting down API server (PID: ${{ env.API_SERVER_PID }})..."
+            kill ${{ env.API_SERVER_PID }} || true
+          fi 
--- a/run_cognee_api_server.py
+++ b/run_cognee_api_server.py
@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+"""
+Script to run the Cognee API server for testing.
+"""
+
+import os
+import sys
+import argparse
+from cognee.api.client import start_api_server
+
+def main():
+    """Run the Cognee API server with specified host and port."""
+    parser = argparse.ArgumentParser(description="Run the Cognee API server for testing.")
+    parser.add_argument(
+        "--host", 
+        default="0.0.0.0", 
+        help="Host to bind the server to (default: 0.0.0.0)"
+    )
+    parser.add_argument(
+        "--port", 
+        type=int, 
+        default=8000, 
+        help="Port to bind the server to (default: 8000)"
+    )
+    parser.add_argument(
+        "--env", 
+        choices=["prod", "dev", "local"], 
+        default="local",
+        help="Environment to run the server in (default: local)"
+    )
+    
+    args = parser.parse_args()
+    
+    # Set environment variable
+    os.environ["ENV"] = args.env
+    
+    print(f"Starting Cognee API server in {args.env} mode on {args.host}:{args.port}")
+    
+    try:
+        start_api_server(host=args.host, port=args.port)
+    except KeyboardInterrupt:
+        print("\nServer stopped by user")
+        sys.exit(0)
+    except Exception as e:
+        print(f"Error starting server: {e}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main() 
--- a/test_cognee_responses_api.py
+++ b/test_cognee_responses_api.py
@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+"""
+Test script for Cognee's OpenAI-compatible Responses API
+"""
+
+import os
+import json
+import asyncio
+import httpx
+
+# Configuration
+API_BASE_URL = "http://localhost:8000"  # Change to your actual API URL
+API_ENDPOINT = "/api/v1/responses/"  # Added trailing slash to match the server's redirection
+AUTH_ENDPOINT = "/api/v1/auth/login"
+# JWT token generated from get_token.py (valid for 1 hour from generation)
+# Replace this with a new token if tests fail due to expiration
+JWT_TOKEN = os.getenv(
+    "COGNEE_JWT_TOKEN",
+    "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNjc2MzU1NGMtOTFiZC00MzJjLWFiYTgtZDQyY2Q3MmVkNjU5IiwidGVuYW50X2lkIjoiNDUyMzU0NGQtODJiZC00MzJjLWFjYTctZDQyY2Q3MmVkNjUxIiwicm9sZXMiOlsiYWRtaW4iXSwiZXhwIjoxNzQ2NzM1NTg3fQ.fZtYlhg-7S8ikCNsjmAnYYpv9FQYWaXWgbYnTFkdek0"
+)
+
+# Note: Direct function tests using the tools parameter aren't working due to 
+# issues with how the OpenAI client is processing the requests. However, we can test
+# the API by using prompts that should trigger specific functions.
+
+
+async def test_with_default_tools(token=None):
+    """Test using the default tools provided by the API"""
+    print("\n--- Testing the OpenAI-compatible Responses API ---")
+    
+    # Define payloads for different types of prompts that should trigger different functions
+    payloads = [
+        {
+            "name": "General API capabilities",
+            "payload": {
+                "model": "cognee-v1",
+                "input": "What can I do with this API?",
+                "tool_choice": "auto"
+            },
+            "expected_function": None  # We don't expect any function call for this
+        },
+        {
+            "name": "Search query",
+            "payload": {
+                "model": "cognee-v1",
+                "input": "What information do we have about Python's asyncio module?",
+                "tool_choice": "auto"
+            },
+            "expected_function": "search"  # We expect a search function call
+        },
+        {
+            "name": "Cognify request",
+            "payload": {
+                "model": "cognee-v1",
+                "input": "Please add this information to the knowledge graph: Python's asyncio module provides infrastructure for writing single-threaded concurrent code using coroutines.",
+                "tool_choice": "auto"
+            },
+            "expected_function": "cognify_text"  # We expect a cognify_text function call
+        }
+    ]
+    
+    test_results = {}
+    
+    for test_case in payloads:
+        print(f"\nTesting: {test_case['name']}")
+        
+        headers = {"Content-Type": "application/json"}
+        if token:
+            headers["Authorization"] = f"Bearer {token}"
+        
+        try:
+            async with httpx.AsyncClient(timeout=60.0) as client:  # Increased timeout
+                response = await client.post(
+                    f"{API_BASE_URL}{API_ENDPOINT}",
+                    json=test_case["payload"],
+                    headers=headers
+                )
+                
+                print(f"Status code: {response.status_code}")
+                if response.status_code == 200:
+                    result = response.json()
+                    print(json.dumps(result, indent=2))
+                    
+                    # Check for tool calls - handle both snake_case and camelCase property names
+                    tool_calls = result.get("tool_calls", result.get("toolCalls", []))
+                    
+                    if tool_calls:
+                        function_names = [tc["function"]["name"] for tc in tool_calls if "function" in tc and "name" in tc["function"]]
+                        
+                        expected_fn = test_case["expected_function"]
+                        if expected_fn is None:
+                            # No function expected
+                            if not function_names:
+                                test_pass = True
+                                print(f"✅ {test_case['name']} test passed: No tool calls as expected")
+                            else:
+                                test_pass = False
+                                print(f"❌ {test_case['name']} test failed: Expected no function calls, but got {function_names}")
+                        else:
+                            # Specific function expected
+                            if expected_fn in function_names:
+                                test_pass = True
+                                print(f"✅ {test_case['name']} test passed: Expected function '{expected_fn}' was called")
+                                
+                                # If this is a cognify_text function, check for success status
+                                if expected_fn == "cognify_text":
+                                    for tc in tool_calls:
+                                        if tc.get("function", {}).get("name") == "cognify_text":
+                                            output = tc.get("output", {})
+                                            if output.get("status") == "success":
+                                                print(f"✅ cognify_text operation was successful")
+                                            else:
+                                                print(f"❌ cognify_text operation failed: {output}")
+                                
+                                # If this is a search function, check if we got results
+                                if expected_fn == "search":
+                                    for tc in tool_calls:
+                                        if tc.get("function", {}).get("name") == "search":
+                                            output = tc.get("output", {})
+                                            results = output.get("data", {}).get("result", [])
+                                            if results:
+                                                print(f"✅ search operation returned {len(results)} results")
+                                            else:
+                                                print(f"⚠️ search operation did not return any results")
+                            else:
+                                test_pass = False
+                                print(f"❌ {test_case['name']} test failed: Expected function '{expected_fn}' was not called. Got {function_names}")
+                    else:
+                        # No tool_calls in result
+                        if test_case["expected_function"] is None:
+                            test_pass = True
+                            print(f"✅ {test_case['name']} test passed: No tool calls as expected")
+                        else:
+                            test_pass = False
+                            print(f"❌ {test_case['name']} test failed: Expected function '{test_case['expected_function']}' but no tool calls were made")
+                else:
+                    test_pass = False
+                    print(f"❌ Request failed: {response.text}")
+        except Exception as e:
+            test_pass = False
+            print(f"❌ Exception during test: {str(e)}")
+        
+        test_results[test_case["name"]] = test_pass
+    
+    # Print summary
+    print("\n=== TEST RESULTS SUMMARY ===")
+    passed = sum(1 for result in test_results.values() if result)
+    total = len(test_results)
+    
+    for test_name, result in test_results.items():
+        status = "✅ PASSED" if result else "❌ FAILED"
+        print(f"{test_name}: {status}")
+    
+    print(f"\nPassed {passed}/{total} tests ({passed/total*100:.0f}%)")
+    
+    return passed == total
+
+
+async def main():
+    """Run all tests"""
+    print("Starting Cognee Responses API Tests")
+    
+    # Use the JWT token for authentication
+    token = JWT_TOKEN
+    print(f"Using JWT token: {token[:20]}...")
+    
+    # Run tests with the token
+    success = await test_with_default_tools(token)
+    
+    print("\nAll tests completed")
+    
+    # Return proper exit code for CI/CD pipelines
+    return 0 if success else 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    # Use exit code to signal test success/failure
+    import sys
+    sys.exit(exit_code) 
--- a/test_cognee_responses_api_comprehensive.py
+++ b/test_cognee_responses_api_comprehensive.py
@ -0,0 +1,353 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test script for Cognee's OpenAI-compatible Responses API
+"""
+
+import os
+import json
+import asyncio
+import httpx
+from typing import Dict, Any, Optional, List
+import sys
+
+# Configuration
+API_BASE_URL = os.getenv("COGNEE_API_URL", "http://localhost:8000")
+API_ENDPOINT = "/api/v1/responses/"
+AUTH_ENDPOINT = "/api/v1/auth/login"
+EMAIL = os.getenv("COGNEE_EMAIL", "default_user@example.com")  # Default test user
+PASSWORD = os.getenv("COGNEE_PASSWORD", "default_password")  # Default test password
+
+# Constants
+GREEN = "\033[92m"
+RED = "\033[91m"
+YELLOW = "\033[93m"
+RESET = "\033[0m"
+
+
+def log_success(message: str) -> None:
+    """Print a success message in green"""
+    print(f"{GREEN}✅ {message}{RESET}")
+
+
+def log_error(message: str) -> None:
+    """Print an error message in red"""
+    print(f"{RED}❌ {message}{RESET}")
+
+
+def log_warning(message: str) -> None:
+    """Print a warning message in yellow"""
+    print(f"{YELLOW}⚠️ {message}{RESET}")
+
+
+def log_info(message: str) -> None:
+    """Print an info message"""
+    print(f"ℹ️ {message}")
+
+
+async def authenticate() -> Optional[str]:
+    """Authenticate with the API and return access token"""
+    log_info("Authenticating with the API...")
+    
+    auth_payload = {
+        "email": EMAIL,
+        "password": PASSWORD
+    }
+    
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{API_BASE_URL}{AUTH_ENDPOINT}",
+                json=auth_payload,
+                headers={"Content-Type": "application/json"}
+            )
+            
+            if response.status_code == 200:
+                auth_data = response.json()
+                token = auth_data.get("access_token")
+                if token:
+                    log_success(f"Authentication successful")
+                    return token
+                else:
+                    log_error("Authentication response did not contain access token")
+                    return None
+            else:
+                log_error(f"Authentication failed with status {response.status_code}: {response.text}")
+                return None
+    except Exception as e:
+        log_error(f"Authentication error: {str(e)}")
+        return None
+
+
+async def make_api_request(
+    payload: Dict[str, Any], 
+    token: Optional[str] = None, 
+    expected_status: int = 200
+) -> Dict[str, Any]:
+    """Make a request to the API and return the response"""
+    headers = {"Content-Type": "application/json"}
+    
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                f"{API_BASE_URL}{API_ENDPOINT}",
+                json=payload,
+                headers=headers,
+                timeout=60.0  # Increased timeout for cognify operations
+            )
+            
+            log_info(f"Response status: {response.status_code}")
+            
+            if response.status_code == expected_status:
+                if expected_status == 200:
+                    result = response.json()
+                    return result
+                else:
+                    return {"status": response.status_code, "text": response.text}
+            else:
+                log_error(f"Request failed with status {response.status_code}: {response.text}")
+                return {"error": response.text, "status_code": response.status_code}
+    except Exception as e:
+        log_error(f"Request error: {str(e)}")
+        return {"error": str(e)}
+
+
+def validate_response(response: Dict[str, Any]) -> bool:
+    """Validate the response structure"""
+    required_fields = ["id", "created", "model", "object", "status", "tool_calls"]
+    
+    missing_fields = [field for field in required_fields if field not in response]
+    
+    if missing_fields:
+        log_error(f"Response missing required fields: {', '.join(missing_fields)}")
+        return False
+    
+    if response["object"] != "response":
+        log_error(f"Expected 'object' to be 'response', got '{response['object']}'")
+        return False
+    
+    if not isinstance(response["tool_calls"], list):
+        log_error(f"Expected 'tool_calls' to be a list, got {type(response['tool_calls'])}")
+        return False
+    
+    for i, tool_call in enumerate(response["tool_calls"]):
+        if "id" not in tool_call or "function" not in tool_call or "type" not in tool_call:
+            log_error(f"Tool call {i} missing required fields")
+            return False
+        
+        if "name" not in tool_call["function"] or "arguments" not in tool_call["function"]:
+            log_error(f"Tool call {i} function missing required fields")
+            return False
+    
+    return True
+
+
+async def test_search_function(token: Optional[str] = None) -> bool:
+    """Test the search function via the responses API"""
+    log_info("\n--- Testing search function ---")
+    
+    # Define request payload
+    payload = {
+        "model": "cognee-v1",
+        "input": "What information do we have about Python's asyncio module?",
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search for information within the knowledge graph",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "search_query": {
+                                "type": "string",
+                                "description": "The query to search for in the knowledge graph"
+                            },
+                            "search_type": {
+                                "type": "string",
+                                "description": "Type of search to perform",
+                                "enum": ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "SEMANTIC", "NATURAL_LANGUAGE"]
+                            }
+                        },
+                        "required": ["search_query"]
+                    }
+                }
+            }
+        ],
+        "tool_choice": "auto"
+    }
+    
+    result = await make_api_request(payload, token)
+    
+    if "error" in result:
+        return False
+    
+    if not validate_response(result):
+        return False
+    
+    # Check if we got tool calls
+    if not result["tool_calls"]:
+        log_warning("No tool calls found in response")
+        return False
+    
+    search_tool_calls = [tc for tc in result["tool_calls"] 
+                        if tc["function"]["name"] == "search"]
+    
+    if not search_tool_calls:
+        log_error("No search tool calls found in response")
+        return False
+    
+    log_success("Search function test passed")
+    return True
+
+
+async def test_cognify_function(token: Optional[str] = None) -> bool:
+    """Test the cognify_text function via the responses API"""
+    log_info("\n--- Testing cognify_text function ---")
+    
+    # Define request payload
+    payload = {
+        "model": "cognee-v1",
+        "input": "Please add this information to the knowledge graph: Python's asyncio module provides infrastructure for writing single-threaded concurrent code using coroutines.",
+        "tools": [
+            {
+                "type": "function",
+                "function": {
+                    "name": "cognify_text",
+                    "description": "Convert text into a knowledge graph",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "text": {
+                                "type": "string",
+                                "description": "Text content to be converted into a knowledge graph"
+                            }
+                        },
+                        "required": ["text"]
+                    }
+                }
+            }
+        ],
+        "tool_choice": "auto"
+    }
+    
+    result = await make_api_request(payload, token)
+    
+    if "error" in result:
+        return False
+    
+    if not validate_response(result):
+        return False
+    
+    # Check if we got tool calls
+    if not result["tool_calls"]:
+        log_warning("No tool calls found in response")
+        return False
+    
+    cognify_tool_calls = [tc for tc in result["tool_calls"] 
+                         if tc["function"]["name"] == "cognify_text"]
+    
+    if not cognify_tool_calls:
+        log_error("No cognify_text tool calls found in response")
+        return False
+    
+    # Check if output is successful
+    for tool_call in cognify_tool_calls:
+        if "output" in tool_call:
+            output = tool_call["output"]
+            if output.get("status") != "success":
+                log_error(f"Cognify operation failed: {output}")
+                return False
+    
+    log_success("Cognify function test passed")
+    return True
+
+
+async def test_with_default_tools(token: Optional[str] = None) -> bool:
+    """Test using the default tools provided by the API"""
+    log_info("\n--- Testing with default tools ---")
+    
+    # Define request payload - omitting tools to use defaults
+    payload = {
+        "model": "cognee-v1",
+        "input": "What can I do with this API?",
+        "tool_choice": "auto"
+    }
+    
+    result = await make_api_request(payload, token)
+    
+    if "error" in result:
+        return False
+    
+    if not validate_response(result):
+        return False
+    
+    log_success("Default tools test passed")
+    return True
+
+
+async def test_invalid_request(token: Optional[str] = None) -> bool:
+    """Test handling of invalid requests"""
+    log_info("\n--- Testing invalid request handling ---")
+    
+    # Missing required parameter (model)
+    payload = {
+        "input": "What can I do with this API?"
+    }
+    
+    result = await make_api_request(payload, token, expected_status=422)
+    
+    if "status_code" in result and result["status_code"] == 422:
+        log_success("Invalid request properly rejected")
+        return True
+    else:
+        log_error("Invalid request not properly rejected")
+        return False
+
+
+async def main():
+    """Run all tests"""
+    log_info("Starting Cognee Responses API Tests")
+    
+    # Get authentication token
+    token = await authenticate()
+    
+    # Run tests
+    results = {}
+    
+    # Basic functionality
+    results["search_function"] = await test_search_function(token)
+    results["cognify_function"] = await test_cognify_function(token)
+    results["default_tools"] = await test_with_default_tools(token)
+    
+    # Error handling
+    results["invalid_request"] = await test_invalid_request(token)
+    
+    # Summary
+    print("\n" + "="*50)
+    print("TEST RESULTS SUMMARY")
+    print("="*50)
+    
+    passed = sum(1 for result in results.values() if result)
+    total = len(results)
+    
+    for test_name, result in results.items():
+        status = f"{GREEN}PASSED{RESET}" if result else f"{RED}FAILED{RESET}"
+        print(f"{test_name.replace('_', ' ').title()}: {status}")
+    
+    print("-"*50)
+    print(f"Tests passed: {passed}/{total} ({100 * passed / total:.1f}%)")
+    
+    if passed == total:
+        log_success("\nAll tests passed! The OpenAI-compatible Responses API is working correctly.")
+        return 0
+    else:
+        log_error("\nSome tests failed. Please check the logs for details.")
+        return 1
+
+
+if __name__ == "__main__":
+    exit_code = asyncio.run(main())
+    sys.exit(exit_code)