openrag/docs/openapi/generate_openapi.py
Mendon Kissling 460762e3d0 testing
2026-01-05 15:40:11 -05:00

979 lines
40 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate OpenAPI specification from Starlette application routes.
This script:
1. Creates the Starlette app (without starting it)
2. Filters routes to only /v1/ endpoints
3. Generates OpenAPI spec from route paths and methods with detailed schemas
4. Writes to docs/openapi/openapi.json
"""
import asyncio
import json
import os
import sys
from pathlib import Path
# Set environment variable to skip Langflow connection attempts for faster generation
os.environ["SKIP_LANGFLOW_INIT"] = "1"
# Add src to path so we can import main
# Script is in docs/openapi/, so go up 2 levels to get to repo root
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
def get_endpoint_schema(path, method):
"""Get detailed schema for a specific endpoint based on path and method."""
# Chat endpoints
if path == "/v1/chat" and method == "POST":
return {
"summary": "Send a chat message",
"description": "Send a chat message via Langflow. Supports both streaming and non-streaming responses.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": ["message"],
"properties": {
"message": {
"type": "string",
"description": "The chat message"
},
"chat_id": {
"type": "string",
"description": "Optional chat ID to continue a conversation"
},
"stream": {
"type": "boolean",
"default": False,
"description": "Whether to stream the response"
},
"filters": {
"type": "object",
"description": "Optional search filters"
},
"limit": {
"type": "integer",
"default": 10,
"description": "Maximum number of search results"
},
"score_threshold": {
"type": "number",
"default": 0,
"description": "Minimum relevance score"
},
"filter_id": {
"type": "string",
"description": "Optional knowledge filter ID"
}
}
},
"example": {
"message": "What is RAG?",
"stream": False
}
}
}
},
"responses": {
"200": {
"description": "Chat response (non-streaming) or streaming response (when stream=true)",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"response": {"type": "string"},
"chat_id": {"type": "string"},
"sources": {
"type": "array",
"items": {"type": "object"}
}
}
},
"example": {
"response": "RAG stands for Retrieval-Augmented Generation...",
"chat_id": "abc123",
"sources": []
}
},
"text/event-stream": {
"schema": {
"type": "string",
"description": "Server-Sent Events stream"
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
},
"example": {"error": "Message is required"}
}
}
},
"401": {
"description": "Unauthorized - Invalid or missing API key",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
},
"example": {"error": "Invalid API key"}
}
}
}
}
}
elif path == "/v1/chat" and method == "GET":
return {
"summary": "List conversations",
"description": "List all conversations for the authenticated user.",
"responses": {
"200": {
"description": "List of conversations",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"conversations": {
"type": "array",
"items": {
"type": "object",
"properties": {
"chat_id": {"type": "string"},
"title": {"type": "string"},
"created_at": {"type": "string"},
"last_activity": {"type": "string"},
"message_count": {"type": "integer"}
}
}
}
}
},
"example": {
"conversations": [{
"chat_id": "abc123",
"title": "What is RAG?",
"created_at": "2024-01-01T00:00:00Z",
"last_activity": "2024-01-01T00:00:00Z",
"message_count": 5
}]
}
}
}
},
"401": {
"description": "Unauthorized - Invalid or missing API key",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
}
}
}
}
}
}
elif path.startswith("/v1/chat/") and method == "GET":
return {
"summary": "Get conversation",
"description": "Get a specific conversation with full message history.",
"responses": {
"200": {
"description": "Conversation details",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"chat_id": {"type": "string"},
"title": {"type": "string"},
"created_at": {"type": "string"},
"last_activity": {"type": "string"},
"messages": {
"type": "array",
"items": {
"type": "object",
"properties": {
"role": {"type": "string", "enum": ["user", "assistant"]},
"content": {"type": "string"},
"timestamp": {"type": "string"}
}
}
}
}
}
}
}
},
"404": {
"description": "Conversation not found",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
},
"example": {"error": "Conversation not found"}
}
}
}
}
}
elif path.startswith("/v1/chat/") and method == "DELETE":
return {
"summary": "Delete conversation",
"description": "Delete a conversation.",
"responses": {
"200": {
"description": "Conversation deleted successfully",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"}
}
},
"example": {"success": True}
}
}
}
}
}
# Search endpoint
elif path == "/v1/search" and method == "POST":
return {
"summary": "Semantic search",
"description": "Perform semantic search on documents.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": ["query"],
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"filters": {
"type": "object",
"properties": {
"data_sources": {
"type": "array",
"items": {"type": "string"}
},
"document_types": {
"type": "array",
"items": {"type": "string"}
}
},
"description": "Optional search filters"
},
"limit": {
"type": "integer",
"default": 10,
"description": "Maximum number of results"
},
"score_threshold": {
"type": "number",
"default": 0,
"description": "Minimum relevance score"
}
}
},
"example": {
"query": "What is RAG?",
"limit": 10,
"score_threshold": 0.5
}
}
}
},
"responses": {
"200": {
"description": "Search results",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"results": {
"type": "array",
"items": {
"type": "object",
"properties": {
"filename": {"type": "string"},
"text": {"type": "string"},
"score": {"type": "number"},
"page": {"type": "integer"},
"mimetype": {"type": "string"}
}
}
}
}
},
"example": {
"results": [{
"filename": "doc.pdf",
"text": "RAG stands for...",
"score": 0.85,
"page": 1,
"mimetype": "application/pdf"
}]
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
},
"example": {"error": "Query is required"}
}
}
}
}
}
# Documents endpoints
elif path == "/v1/documents/ingest" and method == "POST":
return {
"summary": "Ingest document",
"description": "Ingest a document into the knowledge base. Supports both async (via Langflow) and sync processing.",
"requestBody": {
"required": True,
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"required": ["file"],
"properties": {
"file": {
"type": "string",
"format": "binary",
"description": "Document file to ingest"
}
}
}
}
}
},
"responses": {
"200": {
"description": "Document ingestion initiated",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"task_id": {
"type": "string",
"description": "Task ID for async processing"
},
"status": {
"type": "string",
"enum": ["processing"]
},
"filename": {"type": "string"},
"success": {
"type": "boolean",
"description": "Success flag for sync processing"
},
"document_id": {
"type": "string",
"description": "Document ID for sync processing"
},
"chunks": {
"type": "integer",
"description": "Number of chunks created"
}
}
}
}
}
}
}
}
elif path.startswith("/v1/tasks/") and method == "GET":
return {
"summary": "Get task status",
"description": "Get the status of an ingestion task.",
"responses": {
"200": {
"description": "Task status",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"task_id": {"type": "string"},
"status": {
"type": "string",
"enum": ["pending", "processing", "completed", "failed"]
},
"total_files": {"type": "integer"},
"processed_files": {"type": "integer"},
"successful_files": {"type": "integer"},
"failed_files": {"type": "integer"},
"files": {"type": "object"}
}
}
}
}
},
"404": {
"description": "Task not found",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
},
"example": {"error": "Task not found"}
}
}
}
}
}
elif path == "/v1/documents" and method == "DELETE":
return {
"summary": "Delete document",
"description": "Delete a document from the knowledge base.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": ["filename"],
"properties": {
"filename": {
"type": "string",
"description": "Filename of the document to delete"
}
}
},
"example": {
"filename": "doc.pdf"
}
}
}
},
"responses": {
"200": {
"description": "Document deleted successfully",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"deleted_chunks": {"type": "integer"}
}
},
"example": {
"success": True,
"deleted_chunks": 5
}
}
}
}
}
}
# Settings endpoints
elif path == "/v1/settings" and method == "GET":
return {
"summary": "Get settings",
"description": "Get current OpenRAG configuration (read-only). Sensitive information is never exposed.",
"responses": {
"200": {
"description": "Current settings",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"agent": {
"type": "object",
"properties": {
"llm_provider": {"type": "string"},
"llm_model": {"type": "string"}
}
},
"knowledge": {
"type": "object",
"properties": {
"embedding_provider": {"type": "string"},
"embedding_model": {"type": "string"},
"chunk_size": {"type": "integer"},
"chunk_overlap": {"type": "integer"}
}
}
}
},
"example": {
"agent": {
"llm_provider": "openai",
"llm_model": "gpt-4"
},
"knowledge": {
"embedding_provider": "openai",
"embedding_model": "text-embedding-3-small",
"chunk_size": 1000,
"chunk_overlap": 200
}
}
}
}
}
}
}
elif path == "/v1/settings" and method == "POST":
return {
"summary": "Update settings",
"description": "Update OpenRAG configuration settings. Only a limited subset of settings can be updated.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"chunk_size": {
"type": "integer",
"description": "Chunk size for document processing"
},
"chunk_overlap": {
"type": "integer",
"description": "Chunk overlap for document processing"
}
}
},
"example": {
"chunk_size": 1000,
"chunk_overlap": 200
}
}
}
},
"responses": {
"200": {
"description": "Settings updated successfully",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {"type": "string"}
}
},
"example": {
"message": "Configuration updated successfully"
}
}
}
}
}
}
# Knowledge filters endpoints
elif path == "/v1/knowledge-filters" and method == "POST":
return {
"summary": "Create knowledge filter",
"description": "Create a new knowledge filter.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"required": ["name", "queryData"],
"properties": {
"name": {
"type": "string",
"description": "Name of the knowledge filter"
},
"description": {
"type": "string",
"description": "Description of the knowledge filter"
},
"queryData": {
"type": "object",
"description": "Query data for the filter"
},
"allowedUsers": {
"type": "array",
"items": {"type": "string"},
"description": "List of allowed user IDs"
},
"allowedGroups": {
"type": "array",
"items": {"type": "string"},
"description": "List of allowed group IDs"
}
}
}
}
}
},
"responses": {
"201": {
"description": "Knowledge filter created",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"filter": {"type": "object"}
}
}
}
}
}
}
}
elif path == "/v1/knowledge-filters/search" and method == "POST":
return {
"summary": "Search knowledge filters",
"description": "Search for knowledge filters by name, description, or query content.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"limit": {
"type": "integer",
"default": 20,
"description": "Maximum number of results"
}
}
}
}
}
},
"responses": {
"200": {
"description": "List of matching knowledge filters",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"filters": {
"type": "array",
"items": {"type": "object"}
}
}
}
}
}
}
}
}
elif path.startswith("/v1/knowledge-filters/") and method == "GET":
return {
"summary": "Get knowledge filter",
"description": "Get a specific knowledge filter by ID.",
"responses": {
"200": {
"description": "Knowledge filter details",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"filter": {"type": "object"}
}
}
}
}
},
"404": {
"description": "Knowledge filter not found",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
}
}
}
}
}
}
elif path.startswith("/v1/knowledge-filters/") and method == "PUT":
return {
"summary": "Update knowledge filter",
"description": "Update a knowledge filter.",
"requestBody": {
"required": True,
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"name": {"type": "string"},
"description": {"type": "string"},
"queryData": {"type": "object"}
}
}
}
}
},
"responses": {
"200": {
"description": "Knowledge filter updated",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"},
"filter": {"type": "object"}
}
}
}
}
}
}
}
elif path.startswith("/v1/knowledge-filters/") and method == "DELETE":
return {
"summary": "Delete knowledge filter",
"description": "Delete a knowledge filter.",
"responses": {
"200": {
"description": "Knowledge filter deleted",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {"type": "boolean"}
}
},
"example": {"success": True}
}
}
}
}
}
# Default fallback for unknown endpoints
return None
async def generate_openapi_spec():
"""Generate OpenAPI spec from Starlette app routes."""
# Import here to avoid issues with async initialization
from main import create_app
# Create the app (this initializes services)
app = await create_app()
# Filter routes to only /v1/ endpoints
v1_routes = [route for route in app.routes if route.path.startswith("/v1/")]
if not v1_routes:
print("Warning: No /v1/ routes found!")
return
print(f"Found {len(v1_routes)} /v1/ routes")
# Base OpenAPI info
base_schema = {
"openapi": "3.0.3",
"info": {
"title": "OpenRAG API v1",
"description": "OpenRAG Public API v1 provides a clean, versioned interface for external integrations.\nAll endpoints require API key authentication via the `X-API-Key` header.",
"version": "1.0.0",
"contact": {
"name": "OpenRAG Support",
"url": "https://github.com/langflow-ai/openrag",
},
},
"servers": [
{"url": "http://localhost:8000", "description": "Local development server"},
{"url": "https://api.openr.ag", "description": "Production server"},
],
"components": {
"securitySchemes": {
"ApiKeyAuth": {
"type": "apiKey",
"in": "header",
"name": "X-API-Key",
"description": "API key for authentication",
}
}
},
"security": [{"ApiKeyAuth": []}],
"paths": {},
}
# Generate paths from routes
paths = {}
for route in v1_routes:
path = route.path
methods = [m for m in route.methods if m != "HEAD"] # Exclude HEAD
if path not in paths:
paths[path] = {}
for method in methods:
method_lower = method.lower()
# Get detailed schema for this endpoint
endpoint_schema = get_endpoint_schema(path, method)
if endpoint_schema:
# Use the detailed schema
operation = {
"operationId": f"{method_lower}{path.replace('/', '_').replace('{', '').replace('}', '')}",
**endpoint_schema
}
else:
# Fallback to basic schema
operation = {
"summary": f"{method} {path}",
"operationId": f"{method_lower}{path.replace('/', '_').replace('{', '').replace('}', '')}",
"responses": {
"200": {
"description": "Success",
"content": {
"application/json": {
"schema": {"type": "object"}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
}
}
}
},
"401": {
"description": "Unauthorized - Invalid or missing API key",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"error": {"type": "string"}
}
}
}
}
},
}
}
# Add request body for POST/PUT methods
if method in ["POST", "PUT"]:
operation["requestBody"] = {
"required": True,
"content": {
"application/json": {
"schema": {"type": "object"}
}
}
}
# Add path parameters if any
if "{" in path:
import re
param_names = re.findall(r'\{(\w+)\}', path)
if "parameters" not in operation:
operation["parameters"] = []
operation["parameters"].extend([
{
"name": param,
"in": "path",
"required": True,
"schema": {"type": "string"},
"description": f"The {param.replace('_', ' ')}"
}
for param in param_names
])
paths[path][method_lower] = operation
print(f" + {method} {path}")
base_schema["paths"] = paths
# Write to file (same directory as script)
output_path = Path(__file__).parent / "openapi.json"
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(base_schema, f, indent=2)
path_count = len(paths)
print(f"\nGenerated OpenAPI spec with {path_count} paths")
print(f" Written to: {output_path}")
print(f"\nTip: You can enhance this spec by adding detailed descriptions,")
print(f" request/response schemas, and examples to the endpoint docstrings.")
if __name__ == "__main__":
asyncio.run(generate_openapi_spec())