centralized logging
This commit is contained in:
parent
68fd7649e6
commit
a52a842df9
17 changed files with 313 additions and 266 deletions
|
|
@ -26,6 +26,7 @@ dependencies = [
|
|||
"textual>=0.45.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"textual-fspicker>=0.6.0",
|
||||
"structlog>=25.4.0",
|
||||
]
|
||||
|
||||
[tool.uv.sources]
|
||||
|
|
|
|||
102
src/agent.py
102
src/agent.py
|
|
@ -1,3 +1,7 @@
|
|||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# User-scoped conversation state - keyed by user_id -> response_id -> conversation
|
||||
user_conversations = {} # user_id -> {response_id: {"messages": [...], "previous_response_id": parent_id, "created_at": timestamp, "last_activity": timestamp}}
|
||||
|
||||
|
|
@ -65,7 +69,7 @@ async def async_response_stream(
|
|||
previous_response_id: str = None,
|
||||
log_prefix: str = "response",
|
||||
):
|
||||
print(f"user ==> {prompt}")
|
||||
logger.info("User prompt received", prompt=prompt)
|
||||
|
||||
try:
|
||||
# Build request parameters
|
||||
|
|
@ -91,7 +95,7 @@ async def async_response_stream(
|
|||
chunk_count = 0
|
||||
async for chunk in response:
|
||||
chunk_count += 1
|
||||
print(f"[DEBUG] Chunk {chunk_count}: {chunk}")
|
||||
logger.debug("Stream chunk received", chunk_count=chunk_count, chunk=str(chunk))
|
||||
|
||||
# Yield the raw event as JSON for the UI to process
|
||||
import json
|
||||
|
|
@ -125,7 +129,7 @@ async def async_response_stream(
|
|||
yield (json.dumps(chunk_data, default=str) + "\n").encode("utf-8")
|
||||
except Exception as e:
|
||||
# Fallback to string representation
|
||||
print(f"[DEBUG] JSON serialization failed: {e}")
|
||||
logger.warning("JSON serialization failed", error=str(e))
|
||||
yield (
|
||||
json.dumps(
|
||||
{"error": f"Serialization failed: {e}", "raw": str(chunk)}
|
||||
|
|
@ -133,11 +137,11 @@ async def async_response_stream(
|
|||
+ "\n"
|
||||
).encode("utf-8")
|
||||
|
||||
print(f"[DEBUG] Stream complete. Total chunks: {chunk_count}")
|
||||
print(f"{log_prefix} ==> {full_response}")
|
||||
logger.debug("Stream complete", total_chunks=chunk_count)
|
||||
logger.info("Response generated", log_prefix=log_prefix, response=full_response)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Exception in streaming: {e}")
|
||||
logger.error("Exception in streaming", error=str(e))
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
|
@ -153,7 +157,7 @@ async def async_response(
|
|||
previous_response_id: str = None,
|
||||
log_prefix: str = "response",
|
||||
):
|
||||
print(f"user ==> {prompt}")
|
||||
logger.info("User prompt received", prompt=prompt)
|
||||
|
||||
# Build request parameters
|
||||
request_params = {
|
||||
|
|
@ -170,7 +174,7 @@ async def async_response(
|
|||
response = await client.responses.create(**request_params)
|
||||
|
||||
response_text = response.output_text
|
||||
print(f"{log_prefix} ==> {response_text}")
|
||||
logger.info("Response generated", log_prefix=log_prefix, response=response_text)
|
||||
|
||||
# Extract and store response_id if available
|
||||
response_id = getattr(response, "id", None) or getattr(
|
||||
|
|
@ -227,7 +231,7 @@ async def async_langflow_stream(
|
|||
extra_headers: dict = None,
|
||||
previous_response_id: str = None,
|
||||
):
|
||||
print(f"[DEBUG] Starting langflow stream for prompt: {prompt}")
|
||||
logger.debug("Starting langflow stream", prompt=prompt)
|
||||
try:
|
||||
async for chunk in async_stream(
|
||||
langflow_client,
|
||||
|
|
@ -237,11 +241,11 @@ async def async_langflow_stream(
|
|||
previous_response_id=previous_response_id,
|
||||
log_prefix="langflow",
|
||||
):
|
||||
print(f"[DEBUG] Yielding chunk from langflow_stream: {chunk[:100]}...")
|
||||
logger.debug("Yielding chunk from langflow stream", chunk_preview=chunk[:100].decode('utf-8', errors='replace'))
|
||||
yield chunk
|
||||
print(f"[DEBUG] Langflow stream completed")
|
||||
logger.debug("Langflow stream completed")
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Exception in langflow_stream: {e}")
|
||||
logger.error("Exception in langflow stream", error=str(e))
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
|
@ -256,24 +260,18 @@ async def async_chat(
|
|||
model: str = "gpt-4.1-mini",
|
||||
previous_response_id: str = None,
|
||||
):
|
||||
print(
|
||||
f"[DEBUG] async_chat called with user_id: {user_id}, previous_response_id: {previous_response_id}"
|
||||
)
|
||||
logger.debug("async_chat called", user_id=user_id, previous_response_id=previous_response_id)
|
||||
|
||||
# Get the specific conversation thread (or create new one)
|
||||
conversation_state = get_conversation_thread(user_id, previous_response_id)
|
||||
print(
|
||||
f"[DEBUG] Got conversation_state with {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Got conversation state", message_count=len(conversation_state['messages']))
|
||||
|
||||
# Add user message to conversation with timestamp
|
||||
from datetime import datetime
|
||||
|
||||
user_message = {"role": "user", "content": prompt, "timestamp": datetime.now()}
|
||||
conversation_state["messages"].append(user_message)
|
||||
print(
|
||||
f"[DEBUG] Added user message, now {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Added user message", message_count=len(conversation_state['messages']))
|
||||
|
||||
response_text, response_id = await async_response(
|
||||
async_client,
|
||||
|
|
@ -282,9 +280,7 @@ async def async_chat(
|
|||
previous_response_id=previous_response_id,
|
||||
log_prefix="agent",
|
||||
)
|
||||
print(
|
||||
f"[DEBUG] Got response_text: {response_text[:50]}..., response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Got response", response_preview=response_text[:50], response_id=response_id)
|
||||
|
||||
# Add assistant response to conversation with response_id and timestamp
|
||||
assistant_message = {
|
||||
|
|
@ -294,25 +290,19 @@ async def async_chat(
|
|||
"timestamp": datetime.now(),
|
||||
}
|
||||
conversation_state["messages"].append(assistant_message)
|
||||
print(
|
||||
f"[DEBUG] Added assistant message, now {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Added assistant message", message_count=len(conversation_state['messages']))
|
||||
|
||||
# Store the conversation thread with its response_id
|
||||
if response_id:
|
||||
conversation_state["last_activity"] = datetime.now()
|
||||
store_conversation_thread(user_id, response_id, conversation_state)
|
||||
print(
|
||||
f"[DEBUG] Stored conversation thread for user {user_id} with response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Stored conversation thread", user_id=user_id, response_id=response_id)
|
||||
|
||||
# Debug: Check what's in user_conversations now
|
||||
conversations = get_user_conversations(user_id)
|
||||
print(
|
||||
f"[DEBUG] user_conversations now has {len(conversations)} conversations: {list(conversations.keys())}"
|
||||
)
|
||||
logger.debug("User conversations updated", user_id=user_id, conversation_count=len(conversations), conversation_ids=list(conversations.keys()))
|
||||
else:
|
||||
print(f"[DEBUG] WARNING: No response_id received, conversation not stored!")
|
||||
logger.warning("No response_id received, conversation not stored")
|
||||
|
||||
return response_text, response_id
|
||||
|
||||
|
|
@ -373,9 +363,7 @@ async def async_chat_stream(
|
|||
if response_id:
|
||||
conversation_state["last_activity"] = datetime.now()
|
||||
store_conversation_thread(user_id, response_id, conversation_state)
|
||||
print(
|
||||
f"Stored conversation thread for user {user_id} with response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Stored conversation thread", user_id=user_id, response_id=response_id)
|
||||
|
||||
|
||||
# Async langflow function with conversation storage (non-streaming)
|
||||
|
|
@ -387,24 +375,18 @@ async def async_langflow_chat(
|
|||
extra_headers: dict = None,
|
||||
previous_response_id: str = None,
|
||||
):
|
||||
print(
|
||||
f"[DEBUG] async_langflow_chat called with user_id: {user_id}, previous_response_id: {previous_response_id}"
|
||||
)
|
||||
logger.debug("async_langflow_chat called", user_id=user_id, previous_response_id=previous_response_id)
|
||||
|
||||
# Get the specific conversation thread (or create new one)
|
||||
conversation_state = get_conversation_thread(user_id, previous_response_id)
|
||||
print(
|
||||
f"[DEBUG] Got langflow conversation_state with {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Got langflow conversation state", message_count=len(conversation_state['messages']))
|
||||
|
||||
# Add user message to conversation with timestamp
|
||||
from datetime import datetime
|
||||
|
||||
user_message = {"role": "user", "content": prompt, "timestamp": datetime.now()}
|
||||
conversation_state["messages"].append(user_message)
|
||||
print(
|
||||
f"[DEBUG] Added user message to langflow, now {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Added user message to langflow", message_count=len(conversation_state['messages']))
|
||||
|
||||
response_text, response_id = await async_response(
|
||||
langflow_client,
|
||||
|
|
@ -414,9 +396,7 @@ async def async_langflow_chat(
|
|||
previous_response_id=previous_response_id,
|
||||
log_prefix="langflow",
|
||||
)
|
||||
print(
|
||||
f"[DEBUG] Got langflow response_text: {response_text[:50]}..., response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Got langflow response", response_preview=response_text[:50], response_id=response_id)
|
||||
|
||||
# Add assistant response to conversation with response_id and timestamp
|
||||
assistant_message = {
|
||||
|
|
@ -426,27 +406,19 @@ async def async_langflow_chat(
|
|||
"timestamp": datetime.now(),
|
||||
}
|
||||
conversation_state["messages"].append(assistant_message)
|
||||
print(
|
||||
f"[DEBUG] Added assistant message to langflow, now {len(conversation_state['messages'])} messages"
|
||||
)
|
||||
logger.debug("Added assistant message to langflow", message_count=len(conversation_state['messages']))
|
||||
|
||||
# Store the conversation thread with its response_id
|
||||
if response_id:
|
||||
conversation_state["last_activity"] = datetime.now()
|
||||
store_conversation_thread(user_id, response_id, conversation_state)
|
||||
print(
|
||||
f"[DEBUG] Stored langflow conversation thread for user {user_id} with response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Stored langflow conversation thread", user_id=user_id, response_id=response_id)
|
||||
|
||||
# Debug: Check what's in user_conversations now
|
||||
conversations = get_user_conversations(user_id)
|
||||
print(
|
||||
f"[DEBUG] user_conversations now has {len(conversations)} conversations: {list(conversations.keys())}"
|
||||
)
|
||||
logger.debug("User conversations updated", user_id=user_id, conversation_count=len(conversations), conversation_ids=list(conversations.keys()))
|
||||
else:
|
||||
print(
|
||||
f"[DEBUG] WARNING: No response_id received from langflow, conversation not stored!"
|
||||
)
|
||||
logger.warning("No response_id received from langflow, conversation not stored")
|
||||
|
||||
return response_text, response_id
|
||||
|
||||
|
|
@ -460,9 +432,7 @@ async def async_langflow_chat_stream(
|
|||
extra_headers: dict = None,
|
||||
previous_response_id: str = None,
|
||||
):
|
||||
print(
|
||||
f"[DEBUG] async_langflow_chat_stream called with user_id: {user_id}, previous_response_id: {previous_response_id}"
|
||||
)
|
||||
logger.debug("async_langflow_chat_stream called", user_id=user_id, previous_response_id=previous_response_id)
|
||||
|
||||
# Get the specific conversation thread (or create new one)
|
||||
conversation_state = get_conversation_thread(user_id, previous_response_id)
|
||||
|
|
@ -513,6 +483,4 @@ async def async_langflow_chat_stream(
|
|||
if response_id:
|
||||
conversation_state["last_activity"] = datetime.now()
|
||||
store_conversation_thread(user_id, response_id, conversation_state)
|
||||
print(
|
||||
f"[DEBUG] Stored langflow conversation thread for user {user_id} with response_id: {response_id}"
|
||||
)
|
||||
logger.debug("Stored langflow conversation thread", user_id=user_id, response_id=response_id)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse, StreamingResponse
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def chat_endpoint(request: Request, chat_service, session_manager):
|
||||
|
|
@ -122,7 +125,7 @@ async def langflow_endpoint(request: Request, chat_service, session_manager):
|
|||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print(f"[ERROR] Langflow request failed: {str(e)}")
|
||||
logger.error("Langflow request failed", error=str(e))
|
||||
return JSONResponse(
|
||||
{"error": f"Langflow request failed: {str(e)}"}, status_code=500
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse, PlainTextResponse
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def list_connectors(request: Request, connector_service, session_manager):
|
||||
|
|
@ -10,7 +13,7 @@ async def list_connectors(request: Request, connector_service, session_manager):
|
|||
)
|
||||
return JSONResponse({"connectors": connector_types})
|
||||
except Exception as e:
|
||||
print(f"Error listing connectors: {e}")
|
||||
logger.error("Error listing connectors", error=str(e))
|
||||
return JSONResponse({"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
|
|
@ -21,13 +24,11 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
|||
max_files = data.get("max_files")
|
||||
|
||||
try:
|
||||
print(
|
||||
f"[DEBUG] Starting connector sync for connector_type={connector_type}, max_files={max_files}"
|
||||
)
|
||||
logger.debug("Starting connector sync", connector_type=connector_type, max_files=max_files)
|
||||
|
||||
user = request.state.user
|
||||
jwt_token = request.state.jwt_token
|
||||
print(f"[DEBUG] User: {user.user_id}")
|
||||
logger.debug("User authenticated", user_id=user.user_id)
|
||||
|
||||
# Get all active connections for this connector type and user
|
||||
connections = await connector_service.connection_manager.list_connections(
|
||||
|
|
@ -44,14 +45,12 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
|||
# Start sync tasks for all active connections
|
||||
task_ids = []
|
||||
for connection in active_connections:
|
||||
print(
|
||||
f"[DEBUG] About to call sync_connector_files for connection {connection.connection_id}"
|
||||
)
|
||||
logger.debug("About to call sync_connector_files for connection", connection_id=connection.connection_id)
|
||||
task_id = await connector_service.sync_connector_files(
|
||||
connection.connection_id, user.user_id, max_files, jwt_token=jwt_token
|
||||
)
|
||||
task_ids.append(task_id)
|
||||
print(f"[DEBUG] Got task_id: {task_id}")
|
||||
logger.debug("Got task ID", task_id=task_id)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
|
|
@ -68,7 +67,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
|||
import traceback
|
||||
|
||||
error_msg = f"[ERROR] Connector sync failed: {str(e)}"
|
||||
print(error_msg, file=sys.stderr, flush=True)
|
||||
logger.error(error_msg)
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
sys.stderr.flush()
|
||||
|
||||
|
|
@ -156,7 +155,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
payload["_headers"] = headers
|
||||
payload["_method"] = request.method
|
||||
|
||||
print(f"[WEBHOOK] {connector_type} notification received")
|
||||
logger.info("Webhook notification received", connector_type=connector_type)
|
||||
|
||||
# Extract channel/subscription ID using connector-specific method
|
||||
try:
|
||||
|
|
@ -168,7 +167,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
channel_id = None
|
||||
|
||||
if not channel_id:
|
||||
print(f"[WEBHOOK] No channel ID found in {connector_type} webhook")
|
||||
logger.warning("No channel ID found in webhook", connector_type=connector_type)
|
||||
return JSONResponse({"status": "ignored", "reason": "no_channel_id"})
|
||||
|
||||
# Find the specific connection for this webhook
|
||||
|
|
@ -178,9 +177,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
)
|
||||
)
|
||||
if not connection or not connection.is_active:
|
||||
print(
|
||||
f"[WEBHOOK] Unknown channel {channel_id} - no cleanup attempted (will auto-expire)"
|
||||
)
|
||||
logger.info("Unknown webhook channel, will auto-expire", channel_id=channel_id)
|
||||
return JSONResponse(
|
||||
{"status": "ignored_unknown_channel", "channel_id": channel_id}
|
||||
)
|
||||
|
|
@ -191,9 +188,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
# Get the connector instance
|
||||
connector = await connector_service._get_connector(connection.connection_id)
|
||||
if not connector:
|
||||
print(
|
||||
f"[WEBHOOK] Could not get connector for connection {connection.connection_id}"
|
||||
)
|
||||
logger.error("Could not get connector for connection", connection_id=connection.connection_id)
|
||||
return JSONResponse(
|
||||
{"status": "error", "reason": "connector_not_found"}
|
||||
)
|
||||
|
|
@ -202,9 +197,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
affected_files = await connector.handle_webhook(payload)
|
||||
|
||||
if affected_files:
|
||||
print(
|
||||
f"[WEBHOOK] Connection {connection.connection_id}: {len(affected_files)} files affected"
|
||||
)
|
||||
logger.info("Webhook connection files affected", connection_id=connection.connection_id, affected_count=len(affected_files))
|
||||
|
||||
# Generate JWT token for the user (needed for OpenSearch authentication)
|
||||
user = session_manager.get_user(connection.user_id)
|
||||
|
|
@ -228,9 +221,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
}
|
||||
else:
|
||||
# No specific files identified - just log the webhook
|
||||
print(
|
||||
f"[WEBHOOK] Connection {connection.connection_id}: general change detected, no specific files to sync"
|
||||
)
|
||||
logger.info("Webhook general change detected, no specific files", connection_id=connection.connection_id)
|
||||
|
||||
result = {
|
||||
"connection_id": connection.connection_id,
|
||||
|
|
@ -248,9 +239,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] Failed to process webhook for connection {connection.connection_id}: {e}"
|
||||
)
|
||||
logger.error("Failed to process webhook for connection", connection_id=connection.connection_id, error=str(e))
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
|
@ -267,7 +256,7 @@ async def connector_webhook(request: Request, connector_service, session_manager
|
|||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
print(f"[ERROR] Webhook processing failed: {str(e)}")
|
||||
logger.error("Webhook processing failed", error=str(e))
|
||||
traceback.print_exc()
|
||||
return JSONResponse(
|
||||
{"error": f"Webhook processing failed: {str(e)}"}, status_code=500
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ from starlette.responses import JSONResponse
|
|||
import uuid
|
||||
import json
|
||||
from datetime import datetime
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def create_knowledge_filter(
|
||||
|
|
@ -392,17 +395,15 @@ async def knowledge_filter_webhook(
|
|||
# Get the webhook payload
|
||||
payload = await request.json()
|
||||
|
||||
print(
|
||||
f"[WEBHOOK] Knowledge filter webhook received for filter {filter_id}, subscription {subscription_id}"
|
||||
)
|
||||
print(f"[WEBHOOK] Payload: {json.dumps(payload, indent=2)}")
|
||||
logger.info("Knowledge filter webhook received",
|
||||
filter_id=filter_id,
|
||||
subscription_id=subscription_id,
|
||||
payload_size=len(str(payload)))
|
||||
|
||||
# Extract findings from the payload
|
||||
findings = payload.get("findings", [])
|
||||
if not findings:
|
||||
print(
|
||||
f"[WEBHOOK] No findings in webhook payload for subscription {subscription_id}"
|
||||
)
|
||||
logger.info("No findings in webhook payload", subscription_id=subscription_id)
|
||||
return JSONResponse({"status": "no_findings"})
|
||||
|
||||
# Process the findings - these are the documents that matched the knowledge filter
|
||||
|
|
@ -419,13 +420,14 @@ async def knowledge_filter_webhook(
|
|||
)
|
||||
|
||||
# Log the matched documents
|
||||
print(
|
||||
f"[WEBHOOK] Knowledge filter {filter_id} matched {len(matched_documents)} documents"
|
||||
)
|
||||
logger.info("Knowledge filter matched documents",
|
||||
filter_id=filter_id,
|
||||
matched_count=len(matched_documents))
|
||||
for doc in matched_documents:
|
||||
print(
|
||||
f"[WEBHOOK] Matched document: {doc['document_id']} from index {doc['index']}"
|
||||
)
|
||||
logger.debug("Matched document",
|
||||
document_id=doc['document_id'],
|
||||
index=doc['index'],
|
||||
score=doc.get('score'))
|
||||
|
||||
# Here you could add additional processing:
|
||||
# - Send notifications to external webhooks
|
||||
|
|
@ -444,7 +446,10 @@ async def knowledge_filter_webhook(
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to process knowledge filter webhook: {str(e)}")
|
||||
logger.error("Failed to process knowledge filter webhook",
|
||||
filter_id=filter_id,
|
||||
subscription_id=subscription_id,
|
||||
error=str(e))
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
from starlette.requests import Request
|
||||
from starlette.responses import JSONResponse
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
async def search(request: Request, search_service, session_manager):
|
||||
|
|
@ -20,9 +23,14 @@ async def search(request: Request, search_service, session_manager):
|
|||
# Extract JWT token from auth middleware
|
||||
jwt_token = request.state.jwt_token
|
||||
|
||||
print(
|
||||
f"[DEBUG] search API: user={user}, user_id={user.user_id if user else None}, jwt_token={'None' if jwt_token is None else 'present'}"
|
||||
)
|
||||
logger.debug("Search API request",
|
||||
user=str(user),
|
||||
user_id=user.user_id if user else None,
|
||||
has_jwt_token=jwt_token is not None,
|
||||
query=query,
|
||||
filters=filters,
|
||||
limit=limit,
|
||||
score_threshold=score_threshold)
|
||||
|
||||
result = await search_service.search(
|
||||
query,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ import requests
|
|||
import asyncio
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
from opensearchpy import AsyncOpenSearch
|
||||
from opensearchpy._async.http_aiohttp import AIOHttpConnection
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
|
@ -34,9 +37,7 @@ GOOGLE_OAUTH_CLIENT_SECRET = os.getenv("GOOGLE_OAUTH_CLIENT_SECRET")
|
|||
def is_no_auth_mode():
|
||||
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
|
||||
result = not (GOOGLE_OAUTH_CLIENT_ID and GOOGLE_OAUTH_CLIENT_SECRET)
|
||||
print(
|
||||
f"[DEBUG] is_no_auth_mode() = {result}, CLIENT_ID={GOOGLE_OAUTH_CLIENT_ID is not None}, CLIENT_SECRET={GOOGLE_OAUTH_CLIENT_SECRET is not None}"
|
||||
)
|
||||
logger.debug("Checking auth mode", no_auth_mode=result, has_client_id=GOOGLE_OAUTH_CLIENT_ID is not None, has_client_secret=GOOGLE_OAUTH_CLIENT_SECRET is not None)
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -95,17 +96,15 @@ async def generate_langflow_api_key():
|
|||
|
||||
# If key already provided via env, do not attempt generation
|
||||
if LANGFLOW_KEY:
|
||||
print("[INFO] Using LANGFLOW_KEY from environment; skipping generation")
|
||||
logger.info("Using LANGFLOW_KEY from environment, skipping generation")
|
||||
return LANGFLOW_KEY
|
||||
|
||||
if not LANGFLOW_SUPERUSER or not LANGFLOW_SUPERUSER_PASSWORD:
|
||||
print(
|
||||
"[WARNING] LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD not set, skipping API key generation"
|
||||
)
|
||||
logger.warning("LANGFLOW_SUPERUSER and LANGFLOW_SUPERUSER_PASSWORD not set, skipping API key generation")
|
||||
return None
|
||||
|
||||
try:
|
||||
print("[INFO] Generating Langflow API key using superuser credentials...")
|
||||
logger.info("Generating Langflow API key using superuser credentials")
|
||||
max_attempts = int(os.getenv("LANGFLOW_KEY_RETRIES", "15"))
|
||||
delay_seconds = float(os.getenv("LANGFLOW_KEY_RETRY_DELAY", "2.0"))
|
||||
|
||||
|
|
@ -143,28 +142,24 @@ async def generate_langflow_api_key():
|
|||
raise KeyError("api_key")
|
||||
|
||||
LANGFLOW_KEY = api_key
|
||||
print(
|
||||
f"[INFO] Successfully generated Langflow API key: {api_key[:8]}..."
|
||||
)
|
||||
logger.info("Successfully generated Langflow API key", api_key_preview=api_key[:8])
|
||||
return api_key
|
||||
except (requests.exceptions.RequestException, KeyError) as e:
|
||||
last_error = e
|
||||
print(
|
||||
f"[WARN] Attempt {attempt}/{max_attempts} to generate Langflow API key failed: {e}"
|
||||
)
|
||||
logger.warning("Attempt to generate Langflow API key failed", attempt=attempt, max_attempts=max_attempts, error=str(e))
|
||||
if attempt < max_attempts:
|
||||
time.sleep(delay_seconds)
|
||||
else:
|
||||
raise
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"[ERROR] Failed to generate Langflow API key: {e}")
|
||||
logger.error("Failed to generate Langflow API key", error=str(e))
|
||||
return None
|
||||
except KeyError as e:
|
||||
print(f"[ERROR] Unexpected response format from Langflow: missing {e}")
|
||||
logger.error("Unexpected response format from Langflow", missing_field=str(e))
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Unexpected error generating Langflow API key: {e}")
|
||||
logger.error("Unexpected error generating Langflow API key", error=str(e))
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -198,12 +193,10 @@ class AppClients:
|
|||
base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Failed to initialize Langflow client: {e}")
|
||||
logger.warning("Failed to initialize Langflow client", error=str(e))
|
||||
self.langflow_client = None
|
||||
if self.langflow_client is None:
|
||||
print(
|
||||
"[WARNING] No Langflow client initialized yet; will attempt later on first use"
|
||||
)
|
||||
logger.warning("No Langflow client initialized yet, will attempt later on first use")
|
||||
|
||||
# Initialize patched OpenAI client
|
||||
self.patched_async_client = patch_openai_with_mcp(AsyncOpenAI())
|
||||
|
|
@ -224,9 +217,9 @@ class AppClients:
|
|||
self.langflow_client = AsyncOpenAI(
|
||||
base_url=f"{LANGFLOW_URL}/api/v1", api_key=LANGFLOW_KEY
|
||||
)
|
||||
print("[INFO] Langflow client initialized on-demand")
|
||||
logger.info("Langflow client initialized on-demand")
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to initialize Langflow client on-demand: {e}")
|
||||
logger.error("Failed to initialize Langflow client on-demand", error=str(e))
|
||||
self.langflow_client = None
|
||||
return self.langflow_client
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@ from typing import Dict, List, Any, Optional
|
|||
from datetime import datetime
|
||||
from dataclasses import dataclass, asdict
|
||||
from pathlib import Path
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
from .base import BaseConnector
|
||||
from .google_drive import GoogleDriveConnector
|
||||
|
|
@ -318,21 +321,17 @@ class ConnectionManager:
|
|||
if connection_config.config.get(
|
||||
"webhook_channel_id"
|
||||
) or connection_config.config.get("subscription_id"):
|
||||
print(
|
||||
f"[WEBHOOK] Subscription already exists for connection {connection_id}"
|
||||
)
|
||||
logger.info("Webhook subscription already exists", connection_id=connection_id)
|
||||
return
|
||||
|
||||
# Check if webhook URL is configured
|
||||
webhook_url = connection_config.config.get("webhook_url")
|
||||
if not webhook_url:
|
||||
print(
|
||||
f"[WEBHOOK] No webhook URL configured for connection {connection_id}, skipping subscription setup"
|
||||
)
|
||||
logger.info("No webhook URL configured, skipping subscription setup", connection_id=connection_id)
|
||||
return
|
||||
|
||||
try:
|
||||
print(f"[WEBHOOK] Setting up subscription for connection {connection_id}")
|
||||
logger.info("Setting up webhook subscription", connection_id=connection_id)
|
||||
subscription_id = await connector.setup_subscription()
|
||||
|
||||
# Store the subscription and resource IDs in connection config
|
||||
|
|
@ -346,14 +345,10 @@ class ConnectionManager:
|
|||
# Save updated connection config
|
||||
await self.save_connections()
|
||||
|
||||
print(
|
||||
f"[WEBHOOK] Successfully set up subscription {subscription_id} for connection {connection_id}"
|
||||
)
|
||||
logger.info("Successfully set up webhook subscription", connection_id=connection_id, subscription_id=subscription_id)
|
||||
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] Failed to setup webhook subscription for connection {connection_id}: {e}"
|
||||
)
|
||||
logger.error("Failed to setup webhook subscription", connection_id=connection_id, error=str(e))
|
||||
# Don't fail the entire connection setup if webhook fails
|
||||
|
||||
async def _setup_webhook_for_new_connection(
|
||||
|
|
@ -361,16 +356,12 @@ class ConnectionManager:
|
|||
):
|
||||
"""Setup webhook subscription for a newly authenticated connection"""
|
||||
try:
|
||||
print(
|
||||
f"[WEBHOOK] Setting up subscription for newly authenticated connection {connection_id}"
|
||||
)
|
||||
logger.info("Setting up subscription for newly authenticated connection", connection_id=connection_id)
|
||||
|
||||
# Create and authenticate connector
|
||||
connector = self._create_connector(connection_config)
|
||||
if not await connector.authenticate():
|
||||
print(
|
||||
f"[ERROR] Failed to authenticate connector for webhook setup: {connection_id}"
|
||||
)
|
||||
logger.error("Failed to authenticate connector for webhook setup", connection_id=connection_id)
|
||||
return
|
||||
|
||||
# Setup subscription
|
||||
|
|
@ -385,12 +376,8 @@ class ConnectionManager:
|
|||
# Save updated connection config
|
||||
await self.save_connections()
|
||||
|
||||
print(
|
||||
f"[WEBHOOK] Successfully set up subscription {subscription_id} for connection {connection_id}"
|
||||
)
|
||||
logger.info("Successfully set up webhook subscription", connection_id=connection_id, subscription_id=subscription_id)
|
||||
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] Failed to setup webhook subscription for new connection {connection_id}: {e}"
|
||||
)
|
||||
logger.error("Failed to setup webhook subscription for new connection", connection_id=connection_id, error=str(e))
|
||||
# Don't fail the connection setup if webhook fails
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ from typing import Dict, List, Any, Optional
|
|||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
from ..base import BaseConnector, ConnectorDocument, DocumentACL
|
||||
from .oauth import GoogleDriveOAuth
|
||||
|
|
@ -20,9 +23,7 @@ def get_worker_drive_service(client_id: str, client_secret: str, token_file: str
|
|||
"""Get or create a Google Drive service instance for this worker process"""
|
||||
global _worker_drive_service
|
||||
if _worker_drive_service is None:
|
||||
print(
|
||||
f"🔧 Initializing Google Drive service in worker process (PID: {os.getpid()})"
|
||||
)
|
||||
logger.info("Initializing Google Drive service in worker process", pid=os.getpid())
|
||||
|
||||
# Create OAuth instance and load credentials in worker
|
||||
from .oauth import GoogleDriveOAuth
|
||||
|
|
@ -39,9 +40,7 @@ def get_worker_drive_service(client_id: str, client_secret: str, token_file: str
|
|||
try:
|
||||
loop.run_until_complete(oauth.load_credentials())
|
||||
_worker_drive_service = oauth.get_service()
|
||||
print(
|
||||
f"✅ Google Drive service ready in worker process (PID: {os.getpid()})"
|
||||
)
|
||||
logger.info("Google Drive service ready in worker process", pid=os.getpid())
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
|
@ -215,7 +214,7 @@ class GoogleDriveConnector(BaseConnector):
|
|||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Authentication failed: {e}")
|
||||
logger.error("Authentication failed", error=str(e))
|
||||
return False
|
||||
|
||||
async def setup_subscription(self) -> str:
|
||||
|
|
@ -258,7 +257,7 @@ class GoogleDriveConnector(BaseConnector):
|
|||
return channel_id
|
||||
|
||||
except HttpError as e:
|
||||
print(f"Failed to set up subscription: {e}")
|
||||
logger.error("Failed to set up subscription", error=str(e))
|
||||
raise
|
||||
|
||||
def _get_start_page_token(self) -> str:
|
||||
|
|
@ -340,7 +339,7 @@ class GoogleDriveConnector(BaseConnector):
|
|||
return {"files": files, "nextPageToken": results.get("nextPageToken")}
|
||||
|
||||
except HttpError as e:
|
||||
print(f"Failed to list files: {e}")
|
||||
logger.error("Failed to list files", error=str(e))
|
||||
raise
|
||||
|
||||
async def get_file_content(self, file_id: str) -> ConnectorDocument:
|
||||
|
|
@ -397,7 +396,7 @@ class GoogleDriveConnector(BaseConnector):
|
|||
)
|
||||
|
||||
except HttpError as e:
|
||||
print(f"Failed to get file content: {e}")
|
||||
logger.error("Failed to get file content", error=str(e))
|
||||
raise
|
||||
|
||||
async def _download_file_content(
|
||||
|
|
@ -477,19 +476,17 @@ class GoogleDriveConnector(BaseConnector):
|
|||
resource_state = headers.get("x-goog-resource-state")
|
||||
|
||||
if not channel_id:
|
||||
print("[WEBHOOK] No channel ID found in Google Drive webhook")
|
||||
logger.warning("No channel ID found in Google Drive webhook")
|
||||
return []
|
||||
|
||||
# Check if this webhook belongs to this connection
|
||||
if self.webhook_channel_id != channel_id:
|
||||
print(
|
||||
f"[WEBHOOK] Channel ID mismatch: expected {self.webhook_channel_id}, got {channel_id}"
|
||||
)
|
||||
logger.warning("Channel ID mismatch", expected=self.webhook_channel_id, received=channel_id)
|
||||
return []
|
||||
|
||||
# Only process certain states (ignore 'sync' which is just a ping)
|
||||
if resource_state not in ["exists", "not_exists", "change"]:
|
||||
print(f"[WEBHOOK] Ignoring resource state: {resource_state}")
|
||||
logger.debug("Ignoring resource state", state=resource_state)
|
||||
return []
|
||||
|
||||
try:
|
||||
|
|
@ -508,10 +505,10 @@ class GoogleDriveConnector(BaseConnector):
|
|||
page_token = query_params.get("pageToken", [None])[0]
|
||||
|
||||
if not page_token:
|
||||
print("[WEBHOOK] No page token found, cannot identify specific changes")
|
||||
logger.warning("No page token found, cannot identify specific changes")
|
||||
return []
|
||||
|
||||
print(f"[WEBHOOK] Getting changes since page token: {page_token}")
|
||||
logger.info("Getting changes since page token", page_token=page_token)
|
||||
|
||||
# Get list of changes since the page token
|
||||
changes = (
|
||||
|
|
@ -536,23 +533,19 @@ class GoogleDriveConnector(BaseConnector):
|
|||
is_trashed = file_info.get("trashed", False)
|
||||
|
||||
if not is_trashed and mime_type in self.SUPPORTED_MIMETYPES:
|
||||
print(
|
||||
f"[WEBHOOK] File changed: {file_info.get('name', 'Unknown')} ({file_id})"
|
||||
)
|
||||
logger.info("File changed", filename=file_info.get('name', 'Unknown'), file_id=file_id)
|
||||
affected_files.append(file_id)
|
||||
elif is_trashed:
|
||||
print(
|
||||
f"[WEBHOOK] File deleted/trashed: {file_info.get('name', 'Unknown')} ({file_id})"
|
||||
)
|
||||
logger.info("File deleted/trashed", filename=file_info.get('name', 'Unknown'), file_id=file_id)
|
||||
# TODO: Handle file deletion (remove from index)
|
||||
else:
|
||||
print(f"[WEBHOOK] Ignoring unsupported file type: {mime_type}")
|
||||
logger.debug("Ignoring unsupported file type", mime_type=mime_type)
|
||||
|
||||
print(f"[WEBHOOK] Found {len(affected_files)} affected supported files")
|
||||
logger.info("Found affected supported files", count=len(affected_files))
|
||||
return affected_files
|
||||
|
||||
except HttpError as e:
|
||||
print(f"Failed to handle webhook: {e}")
|
||||
logger.error("Failed to handle webhook", error=str(e))
|
||||
return []
|
||||
|
||||
async def cleanup_subscription(self, subscription_id: str) -> bool:
|
||||
|
|
@ -574,5 +567,5 @@ class GoogleDriveConnector(BaseConnector):
|
|||
self.service.channels().stop(body=body).execute()
|
||||
return True
|
||||
except HttpError as e:
|
||||
print(f"Failed to cleanup subscription: {e}")
|
||||
logger.error("Failed to cleanup subscription", error=str(e))
|
||||
return False
|
||||
|
|
|
|||
69
src/main.py
69
src/main.py
|
|
@ -6,6 +6,11 @@ if __name__ == "__main__" and len(sys.argv) > 1 and sys.argv[1] == "--tui":
|
|||
run_tui()
|
||||
sys.exit(0)
|
||||
|
||||
# Configure structured logging early
|
||||
from utils.logging_config import configure_from_env, get_logger
|
||||
configure_from_env()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
import asyncio
|
||||
import atexit
|
||||
import multiprocessing
|
||||
|
|
@ -54,8 +59,7 @@ from api import (
|
|||
settings,
|
||||
)
|
||||
|
||||
print("CUDA available:", torch.cuda.is_available())
|
||||
print("CUDA version PyTorch was built with:", torch.version.cuda)
|
||||
logger.info("CUDA device information", cuda_available=torch.cuda.is_available(), cuda_version=torch.version.cuda)
|
||||
|
||||
|
||||
async def wait_for_opensearch():
|
||||
|
|
@ -66,12 +70,10 @@ async def wait_for_opensearch():
|
|||
for attempt in range(max_retries):
|
||||
try:
|
||||
await clients.opensearch.info()
|
||||
print("OpenSearch is ready!")
|
||||
logger.info("OpenSearch is ready")
|
||||
return
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Attempt {attempt + 1}/{max_retries}: OpenSearch not ready yet ({e})"
|
||||
)
|
||||
logger.warning("OpenSearch not ready yet", attempt=attempt + 1, max_retries=max_retries, error=str(e))
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(retry_delay)
|
||||
else:
|
||||
|
|
@ -93,10 +95,9 @@ async def configure_alerting_security():
|
|||
|
||||
# Use admin client (clients.opensearch uses admin credentials)
|
||||
response = await clients.opensearch.cluster.put_settings(body=alerting_settings)
|
||||
print("Alerting security settings configured successfully")
|
||||
print(f"Response: {response}")
|
||||
logger.info("Alerting security settings configured successfully", response=response)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to configure alerting security settings: {e}")
|
||||
logger.warning("Failed to configure alerting security settings", error=str(e))
|
||||
# Don't fail startup if alerting config fails
|
||||
|
||||
|
||||
|
|
@ -107,9 +108,9 @@ async def init_index():
|
|||
# Create documents index
|
||||
if not await clients.opensearch.indices.exists(index=INDEX_NAME):
|
||||
await clients.opensearch.indices.create(index=INDEX_NAME, body=INDEX_BODY)
|
||||
print(f"Created index '{INDEX_NAME}'")
|
||||
logger.info("Created OpenSearch index", index_name=INDEX_NAME)
|
||||
else:
|
||||
print(f"Index '{INDEX_NAME}' already exists, skipping creation.")
|
||||
logger.info("Index already exists, skipping creation", index_name=INDEX_NAME)
|
||||
|
||||
# Create knowledge filters index
|
||||
knowledge_filter_index_name = "knowledge_filters"
|
||||
|
|
@ -134,11 +135,9 @@ async def init_index():
|
|||
await clients.opensearch.indices.create(
|
||||
index=knowledge_filter_index_name, body=knowledge_filter_index_body
|
||||
)
|
||||
print(f"Created index '{knowledge_filter_index_name}'")
|
||||
logger.info("Created knowledge filters index", index_name=knowledge_filter_index_name)
|
||||
else:
|
||||
print(
|
||||
f"Index '{knowledge_filter_index_name}' already exists, skipping creation."
|
||||
)
|
||||
logger.info("Knowledge filters index already exists, skipping creation", index_name=knowledge_filter_index_name)
|
||||
|
||||
# Configure alerting plugin security settings
|
||||
await configure_alerting_security()
|
||||
|
|
@ -178,24 +177,22 @@ def generate_jwt_keys():
|
|||
capture_output=True,
|
||||
)
|
||||
|
||||
print("Generated RSA keys for JWT signing")
|
||||
logger.info("Generated RSA keys for JWT signing")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Failed to generate RSA keys: {e}")
|
||||
logger.error("Failed to generate RSA keys", error=str(e))
|
||||
raise
|
||||
else:
|
||||
print("RSA keys already exist, skipping generation")
|
||||
logger.info("RSA keys already exist, skipping generation")
|
||||
|
||||
|
||||
async def init_index_when_ready():
|
||||
"""Initialize OpenSearch index when it becomes available"""
|
||||
try:
|
||||
await init_index()
|
||||
print("OpenSearch index initialization completed successfully")
|
||||
logger.info("OpenSearch index initialization completed successfully")
|
||||
except Exception as e:
|
||||
print(f"OpenSearch index initialization failed: {e}")
|
||||
print(
|
||||
"OIDC endpoints will still work, but document operations may fail until OpenSearch is ready"
|
||||
)
|
||||
logger.error("OpenSearch index initialization failed", error=str(e))
|
||||
logger.warning("OIDC endpoints will still work, but document operations may fail until OpenSearch is ready")
|
||||
|
||||
|
||||
async def initialize_services():
|
||||
|
|
@ -242,13 +239,11 @@ async def initialize_services():
|
|||
try:
|
||||
await connector_service.initialize()
|
||||
loaded_count = len(connector_service.connection_manager.connections)
|
||||
print(
|
||||
f"[CONNECTORS] Loaded {loaded_count} persisted connection(s) on startup"
|
||||
)
|
||||
logger.info("Loaded persisted connector connections on startup", loaded_count=loaded_count)
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Failed to load persisted connections on startup: {e}")
|
||||
logger.warning("Failed to load persisted connections on startup", error=str(e))
|
||||
else:
|
||||
print(f"[CONNECTORS] Skipping connection loading in no-auth mode")
|
||||
logger.info("Skipping connector loading in no-auth mode")
|
||||
|
||||
return {
|
||||
"document_service": document_service,
|
||||
|
|
@ -661,13 +656,13 @@ async def startup():
|
|||
def cleanup():
|
||||
"""Cleanup on application shutdown"""
|
||||
# Cleanup process pools only (webhooks handled by Starlette shutdown)
|
||||
print("[CLEANUP] Shutting down...")
|
||||
logger.info("Application shutting down")
|
||||
pass
|
||||
|
||||
|
||||
async def cleanup_subscriptions_proper(services):
|
||||
"""Cancel all active webhook subscriptions"""
|
||||
print("[CLEANUP] Cancelling active webhook subscriptions...")
|
||||
logger.info("Cancelling active webhook subscriptions")
|
||||
|
||||
try:
|
||||
connector_service = services["connector_service"]
|
||||
|
|
@ -683,25 +678,21 @@ async def cleanup_subscriptions_proper(services):
|
|||
|
||||
for connection in active_connections:
|
||||
try:
|
||||
print(
|
||||
f"[CLEANUP] Cancelling subscription for connection {connection.connection_id}"
|
||||
)
|
||||
logger.info("Cancelling subscription for connection", connection_id=connection.connection_id)
|
||||
connector = await connector_service.get_connector(
|
||||
connection.connection_id
|
||||
)
|
||||
if connector:
|
||||
subscription_id = connection.config.get("webhook_channel_id")
|
||||
await connector.cleanup_subscription(subscription_id)
|
||||
print(f"[CLEANUP] Cancelled subscription {subscription_id}")
|
||||
logger.info("Cancelled subscription", subscription_id=subscription_id)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] Failed to cancel subscription for {connection.connection_id}: {e}"
|
||||
)
|
||||
logger.error("Failed to cancel subscription", connection_id=connection.connection_id, error=str(e))
|
||||
|
||||
print(f"[CLEANUP] Finished cancelling {len(active_connections)} subscriptions")
|
||||
logger.info("Finished cancelling subscriptions", subscription_count=len(active_connections))
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to cleanup subscriptions: {e}")
|
||||
logger.error("Failed to cleanup subscriptions", error=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ from config.settings import clients, LANGFLOW_URL, FLOW_ID
|
|||
from agent import async_chat, async_langflow, async_chat_stream, async_langflow_stream
|
||||
from auth_context import set_auth_context
|
||||
import json
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class ChatService:
|
||||
|
|
@ -108,9 +111,7 @@ class ChatService:
|
|||
|
||||
# Pass the complete filter expression as a single header to Langflow (only if we have something to send)
|
||||
if filter_expression:
|
||||
print(
|
||||
f"Sending OpenRAG query filter to Langflow: {json.dumps(filter_expression, indent=2)}"
|
||||
)
|
||||
logger.info("Sending OpenRAG query filter to Langflow", filter_expression=filter_expression)
|
||||
extra_headers["X-LANGFLOW-GLOBAL-VAR-OPENRAG-QUERY-FILTER"] = json.dumps(
|
||||
filter_expression
|
||||
)
|
||||
|
|
@ -200,9 +201,7 @@ class ChatService:
|
|||
return {"error": "User ID is required", "conversations": []}
|
||||
|
||||
conversations_dict = get_user_conversations(user_id)
|
||||
print(
|
||||
f"[DEBUG] get_chat_history for user {user_id}: found {len(conversations_dict)} conversations"
|
||||
)
|
||||
logger.debug("Getting chat history for user", user_id=user_id, conversation_count=len(conversations_dict))
|
||||
|
||||
# Convert conversations dict to list format with metadata
|
||||
conversations = []
|
||||
|
|
|
|||
|
|
@ -8,6 +8,9 @@ from docling_core.types.io import DocumentStream
|
|||
from typing import List
|
||||
import openai
|
||||
import tiktoken
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
from config.settings import clients, INDEX_NAME, EMBED_MODEL
|
||||
from utils.document_processing import extract_relevant, process_document_sync
|
||||
|
|
@ -91,7 +94,7 @@ class DocumentService:
|
|||
def _recreate_process_pool(self):
|
||||
"""Recreate the process pool if it's broken"""
|
||||
if self._process_pool_broken and self.process_pool:
|
||||
print("[WARNING] Attempting to recreate broken process pool...")
|
||||
logger.warning("Attempting to recreate broken process pool")
|
||||
try:
|
||||
# Shutdown the old pool
|
||||
self.process_pool.shutdown(wait=False)
|
||||
|
|
@ -102,10 +105,10 @@ class DocumentService:
|
|||
|
||||
self.process_pool = ProcessPoolExecutor(max_workers=MAX_WORKERS)
|
||||
self._process_pool_broken = False
|
||||
print(f"[INFO] Process pool recreated with {MAX_WORKERS} workers")
|
||||
logger.info("Process pool recreated", worker_count=MAX_WORKERS)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to recreate process pool: {e}")
|
||||
logger.error("Failed to recreate process pool", error=str(e))
|
||||
return False
|
||||
return False
|
||||
|
||||
|
|
@ -193,8 +196,8 @@ class DocumentService:
|
|||
index=INDEX_NAME, id=chunk_id, body=chunk_doc
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] OpenSearch indexing failed for chunk {chunk_id}: {e}")
|
||||
print(f"[ERROR] Chunk document: {chunk_doc}")
|
||||
logger.error("OpenSearch indexing failed for chunk", chunk_id=chunk_id, error=str(e))
|
||||
logger.error("Chunk document details", chunk_doc=chunk_doc)
|
||||
raise
|
||||
return {"status": "indexed", "id": file_hash}
|
||||
|
||||
|
|
@ -229,9 +232,7 @@ class DocumentService:
|
|||
try:
|
||||
exists = await opensearch_client.exists(index=INDEX_NAME, id=file_hash)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] OpenSearch exists check failed for document {file_hash}: {e}"
|
||||
)
|
||||
logger.error("OpenSearch exists check failed", file_hash=file_hash, error=str(e))
|
||||
raise
|
||||
if exists:
|
||||
return {"status": "unchanged", "id": file_hash}
|
||||
|
|
@ -371,10 +372,8 @@ class DocumentService:
|
|||
index=INDEX_NAME, id=chunk_id, body=chunk_doc
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[ERROR] OpenSearch indexing failed for batch chunk {chunk_id}: {e}"
|
||||
)
|
||||
print(f"[ERROR] Chunk document: {chunk_doc}")
|
||||
logger.error("OpenSearch indexing failed for batch chunk", chunk_id=chunk_id, error=str(e))
|
||||
logger.error("Chunk document details", chunk_doc=chunk_doc)
|
||||
raise
|
||||
|
||||
result = {"status": "indexed", "id": slim_doc["id"]}
|
||||
|
|
@ -389,29 +388,25 @@ class DocumentService:
|
|||
from concurrent.futures import BrokenExecutor
|
||||
|
||||
if isinstance(e, BrokenExecutor):
|
||||
print(f"[CRITICAL] Process pool broken while processing {file_path}")
|
||||
print(f"[INFO] This usually indicates a worker process crashed")
|
||||
print(
|
||||
f"[INFO] You should see detailed crash logs above from the worker process"
|
||||
)
|
||||
logger.error("Process pool broken while processing file", file_path=file_path)
|
||||
logger.info("Worker process likely crashed")
|
||||
logger.info("You should see detailed crash logs above from the worker process")
|
||||
|
||||
# Mark pool as broken for potential recreation
|
||||
self._process_pool_broken = True
|
||||
|
||||
# Attempt to recreate the pool for future operations
|
||||
if self._recreate_process_pool():
|
||||
print(f"[INFO] Process pool successfully recreated")
|
||||
logger.info("Process pool successfully recreated")
|
||||
else:
|
||||
print(
|
||||
f"[WARNING] Failed to recreate process pool - future operations may fail"
|
||||
)
|
||||
logger.warning("Failed to recreate process pool - future operations may fail")
|
||||
|
||||
file_task.error = f"Worker process crashed: {str(e)}"
|
||||
else:
|
||||
print(f"[ERROR] Failed to process file {file_path}: {e}")
|
||||
logger.error("Failed to process file", file_path=file_path, error=str(e))
|
||||
file_task.error = str(e)
|
||||
|
||||
print(f"[ERROR] Full traceback:")
|
||||
logger.error("Full traceback available")
|
||||
traceback.print_exc()
|
||||
file_task.status = TaskStatus.FAILED
|
||||
upload_task.failed_files += 1
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ import json
|
|||
from typing import Any, Dict, Optional, List
|
||||
from datetime import datetime
|
||||
from config.settings import clients
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class MonitorService:
|
||||
|
|
@ -192,7 +195,7 @@ class MonitorService:
|
|||
return monitors
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error listing monitors for user {user_id}: {e}")
|
||||
logger.error("Error listing monitors for user", user_id=user_id, error=str(e))
|
||||
return []
|
||||
|
||||
async def list_monitors_for_filter(
|
||||
|
|
@ -233,7 +236,7 @@ class MonitorService:
|
|||
return monitors
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error listing monitors for filter {filter_id}: {e}")
|
||||
logger.error("Error listing monitors for filter", filter_id=filter_id, error=str(e))
|
||||
return []
|
||||
|
||||
async def _get_or_create_webhook_destination(
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ from typing import Any, Dict, Optional
|
|||
from agentd.tool_decorator import tool
|
||||
from config.settings import clients, INDEX_NAME, EMBED_MODEL
|
||||
from auth_context import get_auth_context
|
||||
from utils.logging_config import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class SearchService:
|
||||
|
|
@ -135,13 +138,9 @@ class SearchService:
|
|||
search_body["min_score"] = score_threshold
|
||||
|
||||
# Authentication required - DLS will handle document filtering automatically
|
||||
print(
|
||||
f"[DEBUG] search_service: user_id={user_id}, jwt_token={'None' if jwt_token is None else 'present'}"
|
||||
)
|
||||
logger.debug("search_service authentication info", user_id=user_id, has_jwt_token=jwt_token is not None)
|
||||
if not user_id:
|
||||
print(
|
||||
f"[DEBUG] search_service: user_id is None/empty, returning auth error"
|
||||
)
|
||||
logger.debug("search_service: user_id is None/empty, returning auth error")
|
||||
return {"results": [], "error": "Authentication required"}
|
||||
|
||||
# Get user's OpenSearch client with JWT for OIDC auth through session manager
|
||||
|
|
@ -152,8 +151,7 @@ class SearchService:
|
|||
try:
|
||||
results = await opensearch_client.search(index=INDEX_NAME, body=search_body)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] OpenSearch query failed: {e}")
|
||||
print(f"[ERROR] Search body: {search_body}")
|
||||
logger.error("OpenSearch query failed", error=str(e), search_body=search_body)
|
||||
# Re-raise the exception so the API returns the error to frontend
|
||||
raise
|
||||
|
||||
|
|
|
|||
|
|
@ -162,6 +162,17 @@ class ConfigScreen(Screen):
|
|||
yield Label("Google OAuth Client ID")
|
||||
# Where to create Google OAuth credentials (helper above the box)
|
||||
yield Static(Text("Create credentials: https://console.cloud.google.com/apis/credentials", style="dim"), classes="helper-text")
|
||||
# Callback URL guidance for Google OAuth
|
||||
yield Static(
|
||||
Text(
|
||||
"Important: add an Authorized redirect URI to your Google OAuth app(s):\n"
|
||||
" - Local: http://localhost:3000/auth/callback\n"
|
||||
" - Prod: https://your-domain.com/auth/callback\n"
|
||||
"If you use separate apps for login and connectors, add this URL to BOTH.",
|
||||
style="dim"
|
||||
),
|
||||
classes="helper-text"
|
||||
)
|
||||
current_value = getattr(self.env_manager.config, "google_oauth_client_id", "")
|
||||
input_widget = Input(
|
||||
placeholder="xxx.apps.googleusercontent.com",
|
||||
|
|
@ -189,6 +200,17 @@ class ConfigScreen(Screen):
|
|||
yield Label("Microsoft Graph Client ID")
|
||||
# Where to create Microsoft app registrations (helper above the box)
|
||||
yield Static(Text("Create app: https://portal.azure.com/#view/Microsoft_AAD_RegisteredApps/ApplicationsListBlade", style="dim"), classes="helper-text")
|
||||
# Callback URL guidance for Microsoft OAuth
|
||||
yield Static(
|
||||
Text(
|
||||
"Important: configure a Web redirect URI for your Microsoft app(s):\n"
|
||||
" - Local: http://localhost:3000/auth/callback\n"
|
||||
" - Prod: https://your-domain.com/auth/callback\n"
|
||||
"If you use separate apps for login and connectors, add this URI to BOTH.",
|
||||
style="dim"
|
||||
),
|
||||
classes="helper-text"
|
||||
)
|
||||
current_value = getattr(self.env_manager.config, "microsoft_graph_oauth_client_id", "")
|
||||
input_widget = Input(
|
||||
placeholder="",
|
||||
|
|
|
|||
81
src/utils/logging_config.py
Normal file
81
src/utils/logging_config.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import os
|
||||
import sys
|
||||
from typing import Any, Dict
|
||||
import structlog
|
||||
from structlog import processors
|
||||
|
||||
|
||||
def configure_logging(
|
||||
log_level: str = "INFO",
|
||||
json_logs: bool = False,
|
||||
include_timestamps: bool = True,
|
||||
service_name: str = "openrag"
|
||||
) -> None:
|
||||
"""Configure structlog for the application."""
|
||||
|
||||
# Convert string log level to actual level
|
||||
level = getattr(structlog.stdlib.logging, log_level.upper(), structlog.stdlib.logging.INFO)
|
||||
|
||||
# Base processors
|
||||
shared_processors = [
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.processors.add_log_level,
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.dev.set_exc_info,
|
||||
]
|
||||
|
||||
if include_timestamps:
|
||||
shared_processors.append(structlog.processors.TimeStamper(fmt="iso"))
|
||||
|
||||
# Add service name to all logs
|
||||
shared_processors.append(
|
||||
structlog.processors.CallsiteParameterAdder(
|
||||
parameters=[structlog.processors.CallsiteParameter.FUNC_NAME]
|
||||
)
|
||||
)
|
||||
|
||||
# Console output configuration
|
||||
if json_logs or os.getenv("LOG_FORMAT", "").lower() == "json":
|
||||
# JSON output for production/containers
|
||||
shared_processors.append(structlog.processors.JSONRenderer())
|
||||
console_renderer = structlog.processors.JSONRenderer()
|
||||
else:
|
||||
# Pretty colored output for development
|
||||
console_renderer = structlog.dev.ConsoleRenderer(
|
||||
colors=sys.stderr.isatty(),
|
||||
exception_formatter=structlog.dev.plain_traceback,
|
||||
)
|
||||
|
||||
# Configure structlog
|
||||
structlog.configure(
|
||||
processors=shared_processors + [console_renderer],
|
||||
wrapper_class=structlog.make_filtering_bound_logger(level),
|
||||
context_class=dict,
|
||||
logger_factory=structlog.WriteLoggerFactory(sys.stderr),
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
# Add global context
|
||||
structlog.contextvars.clear_contextvars()
|
||||
structlog.contextvars.bind_contextvars(service=service_name)
|
||||
|
||||
|
||||
def get_logger(name: str = None) -> structlog.BoundLogger:
|
||||
"""Get a configured logger instance."""
|
||||
if name:
|
||||
return structlog.get_logger(name)
|
||||
return structlog.get_logger()
|
||||
|
||||
|
||||
# Convenience function to configure logging from environment
|
||||
def configure_from_env() -> None:
|
||||
"""Configure logging from environment variables."""
|
||||
log_level = os.getenv("LOG_LEVEL", "INFO")
|
||||
json_logs = os.getenv("LOG_FORMAT", "").lower() == "json"
|
||||
service_name = os.getenv("SERVICE_NAME", "openrag")
|
||||
|
||||
configure_logging(
|
||||
log_level=log_level,
|
||||
json_logs=json_logs,
|
||||
service_name=service_name
|
||||
)
|
||||
11
uv.lock
generated
11
uv.lock
generated
|
|
@ -1425,6 +1425,7 @@ dependencies = [
|
|||
{ name = "python-multipart" },
|
||||
{ name = "rich" },
|
||||
{ name = "starlette" },
|
||||
{ name = "structlog" },
|
||||
{ name = "textual" },
|
||||
{ name = "textual-fspicker" },
|
||||
{ name = "torch", version = "2.7.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
|
||||
|
|
@ -1451,6 +1452,7 @@ requires-dist = [
|
|||
{ name = "python-multipart", specifier = ">=0.0.20" },
|
||||
{ name = "rich", specifier = ">=13.0.0" },
|
||||
{ name = "starlette", specifier = ">=0.47.1" },
|
||||
{ name = "structlog", specifier = ">=25.4.0" },
|
||||
{ name = "textual", specifier = ">=0.45.0" },
|
||||
{ name = "textual-fspicker", specifier = ">=0.6.0" },
|
||||
{ name = "torch", marker = "platform_machine != 'x86_64' or sys_platform != 'linux'", specifier = ">=2.7.1" },
|
||||
|
|
@ -2316,6 +2318,15 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/82/95/38ef0cd7fa11eaba6a99b3c4f5ac948d8bc6ff199aabd327a29cc000840c/starlette-0.47.1-py3-none-any.whl", hash = "sha256:5e11c9f5c7c3f24959edbf2dffdc01bba860228acf657129467d8a7468591527", size = 72747, upload-time = "2025-06-21T04:03:15.705Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "structlog"
|
||||
version = "25.4.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/79/b9/6e672db4fec07349e7a8a8172c1a6ae235c58679ca29c3f86a61b5e59ff3/structlog-25.4.0.tar.gz", hash = "sha256:186cd1b0a8ae762e29417095664adf1d6a31702160a46dacb7796ea82f7409e4", size = 1369138, upload-time = "2025-06-02T08:21:12.971Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/4a/97ee6973e3a73c74c8120d59829c3861ea52210667ec3e7a16045c62b64d/structlog-25.4.0-py3-none-any.whl", hash = "sha256:fe809ff5c27e557d14e613f45ca441aabda051d119ee5a0102aaba6ce40eed2c", size = 68720, upload-time = "2025-06-02T08:21:11.43Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sympy"
|
||||
version = "1.14.0"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue