openrag/src/api/langflow_files.py
2025-09-09 12:57:28 -04:00

281 lines
10 KiB
Python

from starlette.requests import Request
from starlette.responses import JSONResponse
from services.langflow_file_service import LangflowFileService
from utils.logging_config import get_logger
logger = get_logger(__name__)
async def upload_user_file(
request: Request, langflow_file_service: LangflowFileService, session_manager
):
try:
logger.debug("upload_user_file endpoint called")
form = await request.form()
upload_file = form.get("file")
if upload_file is None:
logger.error("No file provided in upload request")
return JSONResponse({"error": "Missing file"}, status_code=400)
logger.debug(
"Processing file", filename=upload_file.filename, size=upload_file.size
)
# starlette UploadFile provides file-like; httpx needs (filename, file, content_type)
content = await upload_file.read()
file_tuple = (
upload_file.filename,
content,
upload_file.content_type or "application/octet-stream",
)
jwt_token = getattr(request.state, "jwt_token", None)
logger.debug("JWT token status", jwt_present=jwt_token is not None)
logger.debug("Calling langflow_file_service.upload_user_file")
result = await langflow_file_service.upload_user_file(file_tuple, jwt_token)
logger.debug("Upload successful", result=result)
return JSONResponse(result, status_code=201)
except Exception as e:
logger.error(
"upload_user_file endpoint failed",
error_type=type(e).__name__,
error=str(e),
)
import traceback
logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500)
async def run_ingestion(
request: Request, langflow_file_service: LangflowFileService, session_manager
):
try:
payload = await request.json()
file_ids = payload.get("file_ids")
file_paths = payload.get("file_paths") or []
session_id = payload.get("session_id")
tweaks = payload.get("tweaks") or {}
settings = payload.get("settings", {})
# We assume file_paths is provided. If only file_ids are provided, client would need to resolve to paths via Files API (not implemented here).
if not file_paths and not file_ids:
return JSONResponse(
{"error": "Provide file_paths or file_ids"}, status_code=400
)
# Convert UI settings to component tweaks using exact component IDs
if settings:
logger.debug("Applying ingestion settings", settings=settings)
# Split Text component tweaks (SplitText-QIKhg)
if (
settings.get("chunkSize")
or settings.get("chunkOverlap")
or settings.get("separator")
):
if "SplitText-QIKhg" not in tweaks:
tweaks["SplitText-QIKhg"] = {}
if settings.get("chunkSize"):
tweaks["SplitText-QIKhg"]["chunk_size"] = settings["chunkSize"]
if settings.get("chunkOverlap"):
tweaks["SplitText-QIKhg"]["chunk_overlap"] = settings[
"chunkOverlap"
]
if settings.get("separator"):
tweaks["SplitText-QIKhg"]["separator"] = settings["separator"]
# OpenAI Embeddings component tweaks (OpenAIEmbeddings-joRJ6)
if settings.get("embeddingModel"):
if "OpenAIEmbeddings-joRJ6" not in tweaks:
tweaks["OpenAIEmbeddings-joRJ6"] = {}
tweaks["OpenAIEmbeddings-joRJ6"]["model"] = settings["embeddingModel"]
# Note: OpenSearch component tweaks not needed for ingestion
# (search parameters are for retrieval, not document processing)
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
# Include user JWT if available
jwt_token = getattr(request.state, "jwt_token", None)
# Extract user info from User object
user = getattr(request.state, "user", None)
user_id = user.user_id if user else None
user_name = user.name if user else None
user_email = user.email if user else None
if jwt_token:
# Set auth context for downstream services
from auth_context import set_auth_context
set_auth_context(user_id, jwt_token)
result = await langflow_file_service.run_ingestion_flow(
file_paths=file_paths or [],
jwt_token=jwt_token,
session_id=session_id,
tweaks=tweaks,
owner=user_id,
owner_name=user_name,
owner_email=user_email,
connector_type="local",
)
return JSONResponse(result)
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500)
async def upload_and_ingest_user_file(
request: Request, langflow_file_service: LangflowFileService, session_manager, task_service
):
"""Combined upload and ingest endpoint - uses task service for tracking and cancellation"""
try:
logger.debug("upload_and_ingest_user_file endpoint called - using task service")
form = await request.form()
upload_file = form.get("file")
if upload_file is None:
logger.error("No file provided in upload_and_ingest request")
return JSONResponse({"error": "Missing file"}, status_code=400)
# Extract optional parameters
session_id = form.get("session_id")
settings_json = form.get("settings")
tweaks_json = form.get("tweaks")
delete_after_ingest = form.get("delete_after_ingest", "true").lower() == "true"
# Parse JSON fields if provided
settings = None
tweaks = None
if settings_json:
try:
import json
settings = json.loads(settings_json)
except json.JSONDecodeError as e:
logger.error("Invalid settings JSON", error=str(e))
return JSONResponse({"error": "Invalid settings JSON"}, status_code=400)
if tweaks_json:
try:
import json
tweaks = json.loads(tweaks_json)
except json.JSONDecodeError as e:
logger.error("Invalid tweaks JSON", error=str(e))
return JSONResponse({"error": "Invalid tweaks JSON"}, status_code=400)
# Get user info from request state
user = getattr(request.state, "user", None)
user_id = user.user_id if user else None
user_name = user.name if user else None
user_email = user.email if user else None
jwt_token = getattr(request.state, "jwt_token", None)
if not user_id:
return JSONResponse({"error": "User authentication required"}, status_code=401)
logger.debug(
"Processing file for task-based upload and ingest",
filename=upload_file.filename,
size=upload_file.size,
session_id=session_id,
has_settings=bool(settings),
has_tweaks=bool(tweaks),
delete_after_ingest=delete_after_ingest,
user_id=user_id
)
# Create temporary file for task processing
import tempfile
import os
# Read file content
content = await upload_file.read()
# Create temporary file
safe_filename = upload_file.filename.replace(" ", "_").replace("/", "_")
temp_fd, temp_path = tempfile.mkstemp(
suffix=f"_{safe_filename}"
)
try:
# Write content to temp file
with os.fdopen(temp_fd, 'wb') as temp_file:
temp_file.write(content)
logger.debug("Created temporary file for task processing", temp_path=temp_path)
# Create langflow upload task for single file
task_id = await task_service.create_langflow_upload_task(
user_id=user_id,
file_paths=[temp_path],
langflow_file_service=langflow_file_service,
session_manager=session_manager,
jwt_token=jwt_token,
owner_name=user_name,
owner_email=user_email,
session_id=session_id,
tweaks=tweaks,
settings=settings,
delete_after_ingest=delete_after_ingest,
)
logger.debug("Langflow upload task created successfully", task_id=task_id)
return JSONResponse({
"task_id": task_id,
"message": f"Langflow upload task created for file '{upload_file.filename}'",
"filename": upload_file.filename
}, status_code=202) # 202 Accepted for async processing
except Exception:
# Clean up temp file on error
try:
if os.path.exists(temp_path):
os.unlink(temp_path)
except Exception:
pass # Ignore cleanup errors
raise
except Exception as e:
logger.error(
"upload_and_ingest_user_file endpoint failed",
error_type=type(e).__name__,
error=str(e),
)
import traceback
logger.error("Full traceback", traceback=traceback.format_exc())
return JSONResponse({"error": str(e)}, status_code=500)
async def delete_user_files(
request: Request, langflow_file_service: LangflowFileService, session_manager
):
try:
payload = await request.json()
file_ids = payload.get("file_ids")
if not file_ids or not isinstance(file_ids, list):
return JSONResponse(
{"error": "file_ids must be a non-empty list"}, status_code=400
)
errors = []
for fid in file_ids:
try:
await langflow_file_service.delete_user_file(fid)
except Exception as e:
errors.append({"file_id": fid, "error": str(e)})
status = 207 if errors else 200
return JSONResponse(
{
"deleted": [
fid for fid in file_ids if fid not in [e["file_id"] for e in errors]
],
"errors": errors,
},
status_code=status,
)
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500)