openrag/src/config/settings.py
2025-09-03 15:57:35 -04:00

243 lines
9.2 KiB
Python

import os
import requests
import asyncio
import time
from dotenv import load_dotenv
from utils.logging_config import get_logger
logger = get_logger(__name__)
from opensearchpy import AsyncOpenSearch
from opensearchpy._async.http_aiohttp import AIOHttpConnection
from docling.document_converter import DocumentConverter
from agentd.patch import patch_openai_with_mcp
from openai import AsyncOpenAI
load_dotenv()
load_dotenv("../")
# Environment variables
OPENSEARCH_HOST = os.getenv("OPENSEARCH_HOST", "localhost")
OPENSEARCH_PORT = int(os.getenv("OPENSEARCH_PORT", "9200"))
OPENSEARCH_USERNAME = os.getenv("OPENSEARCH_USERNAME", "admin")
OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD")
LANGFLOW_URL = os.getenv("LANGFLOW_URL", "http://localhost:7860")
# Optional: public URL for browser links (e.g., http://localhost:7860)
LANGFLOW_PUBLIC_URL = os.getenv("LANGFLOW_PUBLIC_URL")
FLOW_ID = os.getenv("FLOW_ID")
# Langflow superuser credentials for API key generation
LANGFLOW_SUPERUSER = os.getenv("LANGFLOW_SUPERUSER")
LANGFLOW_SUPERUSER_PASSWORD = os.getenv("LANGFLOW_SUPERUSER_PASSWORD")
# Allow explicit key via environment; generation will be skipped if set
LANGFLOW_KEY = os.getenv("LANGFLOW_KEY")
SESSION_SECRET = os.getenv("SESSION_SECRET", "your-secret-key-change-in-production")
GOOGLE_OAUTH_CLIENT_ID = os.getenv("GOOGLE_OAUTH_CLIENT_ID")
GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
def is_no_auth_mode():
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
logger.debug("Checking auth mode", no_auth_mode=result, has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None, has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None)
return result
# Webhook configuration - must be set to enable webhooks
WEBHOOK_BASE_URL = os.getenv(
"WEBHOOK_BASE_URL"
) # No default - must be explicitly configured
# OpenSearch configuration
INDEX_NAME = "documents"
VECTOR_DIM = 1536
EMBED_MODEL = "text-embedding-3-small"
INDEX_BODY = {
"settings": {
"index": {"knn": True},
"number_of_shards": 1,
"number_of_replicas": 1,
},
"mappings": {
"properties": {
"document_id": {"type": "keyword"},
"filename": {"type": "keyword"},
"mimetype": {"type": "keyword"},
"page": {"type": "integer"},
"text": {"type": "text"},
"chunk_embedding": {
"type": "knn_vector",
"dimension": VECTOR_DIM,
"method": {
"name": "disk_ann",
"engine": "jvector",
"space_type": "l2",
"parameters": {"ef_construction": 100, "m": 16},
},
},
"source_url": {"type": "keyword"},
"connector_type": {"type": "keyword"},
"owner": {"type": "keyword"},
"allowed_users": {"type": "keyword"},
"allowed_groups": {"type": "keyword"},
"user_permissions": {"type": "object"},
"group_permissions": {"type": "object"},
"created_time": {"type": "date"},
"modified_time": {"type": "date"},
"indexed_time": {"type": "date"},
"metadata": {"type": "object"},
}
},
}
async def generate_langflow_api_key():
"""Generate Langflow API key using superuser credentials at startup"""
global LANGFLOW_KEY
# If key already provided via env, do not attempt generation
if LANGFLOW_KEY:
logger.info("Using LANGFLOW_KEY from environment, skipping generation")
return LANGFLOW_KEY
if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD:
logger.warning("LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD not set, skipping API key generation")
return None
try:
logger.info("Generating Langflow API key using superuser credentials")
max_attempts = int(os.getenv("LANGFLOW_KEY_RETRIES", "15"))
delay_seconds = float(os.getenv("LANGFLOW_KEY_RETRY_DELAY", "2.0"))
last_error = None
for attempt in range(1, max_attempts + 1):
try:
# Login to get access token
login_response = requests.post(
f"{LANGFLOW_URL}/api/v1/login",
headers={"Content-Type": "application/x-www-form-urlencoded"},
data={
"username": LANGFLOW_SUPERUSER,
"password": LANGFLOW_SUPERUSER_PASSWORD,
},
timeout=10,
)
login_response.raise_for_status()
access_token = login_response.json().get("access_token")
if not access_token:
raise KeyError("access_token")
# Create API key
api_key_response = requests.post(
f"{LANGFLOW_URL}/api/v1/api_key/",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {access_token}",
},
json={"name": "openrag-auto-generated"},
timeout=10,
)
api_key_response.raise_for_status()
api_key = api_key_response.json().get("api_key")
if not api_key:
raise KeyError("api_key")
LANGFLOW_KEY = api_key
logger.info("Successfully generated Langflow API key", api_key_preview=api_key[:8])
return api_key
except (requests.exceptions.RequestException, KeyError) as e:
last_error = e
logger.warning("Attempt to generate Langflow API key failed", attempt=attempt, max_attempts=max_attempts, error=str(e))
if attempt < max_attempts:
time.sleep(delay_seconds)
else:
raise
except requests.exceptions.RequestException as e:
logger.error("Failed to generate Langflow API key", error=str(e))
return None
except KeyError as e:
logger.error("Unexpected response format from Langflow", missing_field=str(e))
return None
except Exception as e:
logger.error("Unexpected error generating Langflow API key", error=str(e))
return None
class AppClients:
def __init__(self):
self.opensearch = None
self.langflow_client = None
self.patched_async_client = None
self.converter = None
async def initialize(self):
# Generate Langflow API key first
await generate_langflow_api_key()
# Initialize OpenSearch client
self.opensearch = AsyncOpenSearch(
hosts=[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
connection_class=AIOHttpConnection,
scheme="https",
use_ssl=True,
verify_certs=False,
ssl_assert_fingerprint=None,
http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
http_compress=True,
)
# Initialize Langflow client with generated/provided API key
if LANGFLOW_KEY and self.langflow_client is None:
try:
self.langflow_client = AsyncOpenAI(
base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
)
except Exception as e:
logger.warning("Failed to initialize Langflow client", error=str(e))
self.langflow_client = None
if self.langflow_client is None:
logger.warning("No Langflow client initialized yet, will attempt later on first use")
# Initialize patched OpenAI client
self.patched_async_client = patch_openai_with_mcp(AsyncOpenAI())
# Initialize document converter
self.converter = DocumentConverter()
return self
async def ensure_langflow_client(self):
"""Ensure Langflow client exists; try to generate key and create client lazily."""
if self.langflow_client is not None:
return self.langflow_client
# Try generating key again (with retries)
await generate_langflow_api_key()
if LANGFLOW_KEY and self.langflow_client is None:
try:
self.langflow_client = AsyncOpenAI(
base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
)
logger.info("Langflow client initialized on-demand")
except Exception as e:
logger.error("Failed to initialize Langflow client on-demand", error=str(e))
self.langflow_client = None
return self.langflow_client
def create_user_opensearch_client(self, jwt_token: str):
"""Create OpenSearch client with user's JWT token for OIDC auth"""
headers = {"Authorization": f"Bearer {jwt_token}"}
return AsyncOpenSearch(
hosts=[{"host": OPENSEARCH_HOST, "port": OPENSEARCH_PORT}],
connection_class=AIOHttpConnection,
scheme="https",
use_ssl=True,
verify_certs=False,
ssl_assert_fingerprint=None,
headers=headers,
http_compress=True,
)
# Global clients instance
clients = AppClients()