Merge branch 'main' into langflow-ingestion-modes
This commit is contained in:
commit
faac0c912e
7 changed files with 353 additions and 38 deletions
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "openrag"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
|||
max_files,
|
||||
jwt_token=jwt_token,
|
||||
)
|
||||
task_ids.append(task_id)
|
||||
return JSONResponse(
|
||||
{
|
||||
"task_ids": task_ids,
|
||||
|
|
|
|||
|
|
@ -99,11 +99,17 @@ async def run_ingestion(
|
|||
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
|
||||
# Include user JWT if available
|
||||
jwt_token = getattr(request.state, "jwt_token", None)
|
||||
|
||||
# Extract user info from User object
|
||||
user = getattr(request.state, "user", None)
|
||||
user_id = user.user_id if user else None
|
||||
user_name = user.name if user else None
|
||||
user_email = user.email if user else None
|
||||
|
||||
if jwt_token:
|
||||
# Set auth context for downstream services
|
||||
from auth_context import set_auth_context
|
||||
|
||||
user_id = getattr(request.state, "user_id", None)
|
||||
set_auth_context(user_id, jwt_token)
|
||||
|
||||
result = await langflow_file_service.run_ingestion_flow(
|
||||
|
|
@ -111,6 +117,10 @@ async def run_ingestion(
|
|||
jwt_token=jwt_token,
|
||||
session_id=session_id,
|
||||
tweaks=tweaks,
|
||||
owner=user_id,
|
||||
owner_name=user_name,
|
||||
owner_email=user_email,
|
||||
connector_type="local",
|
||||
)
|
||||
return JSONResponse(result)
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -400,8 +400,9 @@ class GoogleDriveConnector(BaseConnector):
|
|||
export_mime = self._pick_export_mime(mime_type)
|
||||
if mime_type.startswith("application/vnd.google-apps."):
|
||||
# default fallback if not overridden
|
||||
if not export_mime:
|
||||
export_mime = "application/pdf"
|
||||
#if not export_mime:
|
||||
# export_mime = "application/pdf"
|
||||
export_mime = "application/pdf"
|
||||
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
||||
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -53,9 +53,11 @@ class LangflowConnectorService:
|
|||
filename=document.filename,
|
||||
)
|
||||
|
||||
suffix = self._get_file_extension(document.mimetype)
|
||||
|
||||
# Create temporary file from document content
|
||||
with tempfile.NamedTemporaryFile(
|
||||
delete=False, suffix=self._get_file_extension(document.mimetype)
|
||||
delete=False, suffix=suffix
|
||||
) as tmp_file:
|
||||
tmp_file.write(document.content)
|
||||
tmp_file.flush()
|
||||
|
|
@ -65,7 +67,7 @@ class LangflowConnectorService:
|
|||
logger.debug("Uploading file to Langflow", filename=document.filename)
|
||||
content = document.content
|
||||
file_tuple = (
|
||||
document.filename,
|
||||
document.filename.replace(" ", "_").replace("/", "_")+suffix,
|
||||
content,
|
||||
document.mimetype or "application/octet-stream",
|
||||
)
|
||||
|
|
@ -91,7 +93,13 @@ class LangflowConnectorService:
|
|||
tweaks = {} # Let Langflow handle the ingestion with default settings
|
||||
|
||||
ingestion_result = await self.langflow_service.run_ingestion_flow(
|
||||
file_paths=[langflow_file_path], jwt_token=jwt_token, tweaks=tweaks
|
||||
file_paths=[langflow_file_path],
|
||||
jwt_token=jwt_token,
|
||||
tweaks=tweaks,
|
||||
owner=owner_user_id,
|
||||
owner_name=owner_name,
|
||||
owner_email=owner_email,
|
||||
connector_type=connector_type,
|
||||
)
|
||||
|
||||
logger.debug("Ingestion flow completed", result=ingestion_result)
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class LangflowFileService:
|
|||
"[LF] Upload failed",
|
||||
status_code=resp.status_code,
|
||||
reason=resp.reason_phrase,
|
||||
body=resp.text[:500],
|
||||
body=resp.text,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
|
@ -63,6 +63,10 @@ class LangflowFileService:
|
|||
jwt_token: str,
|
||||
session_id: Optional[str] = None,
|
||||
tweaks: Optional[Dict[str, Any]] = None,
|
||||
owner: Optional[str] = None,
|
||||
owner_name: Optional[str] = None,
|
||||
owner_email: Optional[str] = None,
|
||||
connector_type: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Trigger the ingestion flow with provided file paths.
|
||||
|
|
@ -91,6 +95,26 @@ class LangflowFileService:
|
|||
logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
|
||||
else:
|
||||
logger.warning("[LF] No JWT token provided")
|
||||
|
||||
# Pass metadata via tweaks to OpenSearch component
|
||||
metadata_tweaks = []
|
||||
if owner:
|
||||
metadata_tweaks.append({"key": "owner", "value": owner})
|
||||
if owner_name:
|
||||
metadata_tweaks.append({"key": "owner_name", "value": owner_name})
|
||||
if owner_email:
|
||||
metadata_tweaks.append({"key": "owner_email", "value": owner_email})
|
||||
if connector_type:
|
||||
metadata_tweaks.append({"key": "connector_type", "value": connector_type})
|
||||
|
||||
if metadata_tweaks:
|
||||
# Initialize the OpenSearch component tweaks if not already present
|
||||
if "OpenSearchHybrid-Ve6bS" not in tweaks:
|
||||
tweaks["OpenSearchHybrid-Ve6bS"] = {}
|
||||
tweaks["OpenSearchHybrid-Ve6bS"]["docs_metadata"] = metadata_tweaks
|
||||
logger.debug(
|
||||
"[LF] Added metadata to tweaks", metadata_count=len(metadata_tweaks)
|
||||
)
|
||||
if tweaks:
|
||||
payload["tweaks"] = tweaks
|
||||
if session_id:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue