Merge branch 'main' into langflow-ingestion-modes

This commit is contained in:
Edwin Jose 2025-09-09 02:42:52 -04:00
commit faac0c912e
7 changed files with 353 additions and 38 deletions

File diff suppressed because one or more lines are too long

View file

@ -1,6 +1,6 @@
[project]
name = "openrag"
version = "0.1.1"
version = "0.1.2"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"

View file

@ -66,6 +66,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
max_files,
jwt_token=jwt_token,
)
task_ids.append(task_id)
return JSONResponse(
{
"task_ids": task_ids,

View file

@ -99,11 +99,17 @@ async def run_ingestion(
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
# Include user JWT if available
jwt_token = getattr(request.state, "jwt_token", None)
# Extract user info from User object
user = getattr(request.state, "user", None)
user_id = user.user_id if user else None
user_name = user.name if user else None
user_email = user.email if user else None
if jwt_token:
# Set auth context for downstream services
from auth_context import set_auth_context
user_id = getattr(request.state, "user_id", None)
set_auth_context(user_id, jwt_token)
result = await langflow_file_service.run_ingestion_flow(
@ -111,6 +117,10 @@ async def run_ingestion(
jwt_token=jwt_token,
session_id=session_id,
tweaks=tweaks,
owner=user_id,
owner_name=user_name,
owner_email=user_email,
connector_type="local",
)
return JSONResponse(result)
except Exception as e:

View file

@ -400,8 +400,9 @@ class GoogleDriveConnector(BaseConnector):
export_mime = self._pick_export_mime(mime_type)
if mime_type.startswith("application/vnd.google-apps."):
# default fallback if not overridden
if not export_mime:
export_mime = "application/pdf"
#if not export_mime:
# export_mime = "application/pdf"
export_mime = "application/pdf"
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
else:

View file

@ -53,9 +53,11 @@ class LangflowConnectorService:
filename=document.filename,
)
suffix = self._get_file_extension(document.mimetype)
# Create temporary file from document content
with tempfile.NamedTemporaryFile(
delete=False, suffix=self._get_file_extension(document.mimetype)
delete=False, suffix=suffix
) as tmp_file:
tmp_file.write(document.content)
tmp_file.flush()
@ -65,7 +67,7 @@ class LangflowConnectorService:
logger.debug("Uploading file to Langflow", filename=document.filename)
content = document.content
file_tuple = (
document.filename,
document.filename.replace(" ", "_").replace("/", "_")+suffix,
content,
document.mimetype or "application/octet-stream",
)
@ -91,7 +93,13 @@ class LangflowConnectorService:
tweaks = {} # Let Langflow handle the ingestion with default settings
ingestion_result = await self.langflow_service.run_ingestion_flow(
file_paths=[langflow_file_path], jwt_token=jwt_token, tweaks=tweaks
file_paths=[langflow_file_path],
jwt_token=jwt_token,
tweaks=tweaks,
owner=owner_user_id,
owner_name=owner_name,
owner_email=owner_email,
connector_type=connector_type,
)
logger.debug("Ingestion flow completed", result=ingestion_result)

View file

@ -33,7 +33,7 @@ class LangflowFileService:
"[LF] Upload failed",
status_code=resp.status_code,
reason=resp.reason_phrase,
body=resp.text[:500],
body=resp.text,
)
resp.raise_for_status()
return resp.json()
@ -63,6 +63,10 @@ class LangflowFileService:
jwt_token: str,
session_id: Optional[str] = None,
tweaks: Optional[Dict[str, Any]] = None,
owner: Optional[str] = None,
owner_name: Optional[str] = None,
owner_email: Optional[str] = None,
connector_type: Optional[str] = None,
) -> Dict[str, Any]:
"""
Trigger the ingestion flow with provided file paths.
@ -91,6 +95,26 @@ class LangflowFileService:
logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
else:
logger.warning("[LF] No JWT token provided")
# Pass metadata via tweaks to OpenSearch component
metadata_tweaks = []
if owner:
metadata_tweaks.append({"key": "owner", "value": owner})
if owner_name:
metadata_tweaks.append({"key": "owner_name", "value": owner_name})
if owner_email:
metadata_tweaks.append({"key": "owner_email", "value": owner_email})
if connector_type:
metadata_tweaks.append({"key": "connector_type", "value": connector_type})
if metadata_tweaks:
# Initialize the OpenSearch component tweaks if not already present
if "OpenSearchHybrid-Ve6bS" not in tweaks:
tweaks["OpenSearchHybrid-Ve6bS"] = {}
tweaks["OpenSearchHybrid-Ve6bS"]["docs_metadata"] = metadata_tweaks
logger.debug(
"[LF] Added metadata to tweaks", metadata_count=len(metadata_tweaks)
)
if tweaks:
payload["tweaks"] = tweaks
if session_id: