Merge branch 'main' into langflow-ingestion-modes
This commit is contained in:
commit
faac0c912e
7 changed files with 353 additions and 38 deletions
File diff suppressed because one or more lines are too long
|
|
@ -1,6 +1,6 @@
|
||||||
[project]
|
[project]
|
||||||
name = "openrag"
|
name = "openrag"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
description = "Add your description here"
|
description = "Add your description here"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ async def connector_sync(request: Request, connector_service, session_manager):
|
||||||
max_files,
|
max_files,
|
||||||
jwt_token=jwt_token,
|
jwt_token=jwt_token,
|
||||||
)
|
)
|
||||||
|
task_ids.append(task_id)
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
"task_ids": task_ids,
|
"task_ids": task_ids,
|
||||||
|
|
|
||||||
|
|
@ -99,11 +99,17 @@ async def run_ingestion(
|
||||||
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
|
logger.debug("Final tweaks with settings applied", tweaks=tweaks)
|
||||||
# Include user JWT if available
|
# Include user JWT if available
|
||||||
jwt_token = getattr(request.state, "jwt_token", None)
|
jwt_token = getattr(request.state, "jwt_token", None)
|
||||||
|
|
||||||
|
# Extract user info from User object
|
||||||
|
user = getattr(request.state, "user", None)
|
||||||
|
user_id = user.user_id if user else None
|
||||||
|
user_name = user.name if user else None
|
||||||
|
user_email = user.email if user else None
|
||||||
|
|
||||||
if jwt_token:
|
if jwt_token:
|
||||||
# Set auth context for downstream services
|
# Set auth context for downstream services
|
||||||
from auth_context import set_auth_context
|
from auth_context import set_auth_context
|
||||||
|
|
||||||
user_id = getattr(request.state, "user_id", None)
|
|
||||||
set_auth_context(user_id, jwt_token)
|
set_auth_context(user_id, jwt_token)
|
||||||
|
|
||||||
result = await langflow_file_service.run_ingestion_flow(
|
result = await langflow_file_service.run_ingestion_flow(
|
||||||
|
|
@ -111,6 +117,10 @@ async def run_ingestion(
|
||||||
jwt_token=jwt_token,
|
jwt_token=jwt_token,
|
||||||
session_id=session_id,
|
session_id=session_id,
|
||||||
tweaks=tweaks,
|
tweaks=tweaks,
|
||||||
|
owner=user_id,
|
||||||
|
owner_name=user_name,
|
||||||
|
owner_email=user_email,
|
||||||
|
connector_type="local",
|
||||||
)
|
)
|
||||||
return JSONResponse(result)
|
return JSONResponse(result)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -400,8 +400,9 @@ class GoogleDriveConnector(BaseConnector):
|
||||||
export_mime = self._pick_export_mime(mime_type)
|
export_mime = self._pick_export_mime(mime_type)
|
||||||
if mime_type.startswith("application/vnd.google-apps."):
|
if mime_type.startswith("application/vnd.google-apps."):
|
||||||
# default fallback if not overridden
|
# default fallback if not overridden
|
||||||
if not export_mime:
|
#if not export_mime:
|
||||||
export_mime = "application/pdf"
|
# export_mime = "application/pdf"
|
||||||
|
export_mime = "application/pdf"
|
||||||
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
# NOTE: export_media does not accept supportsAllDrives/includeItemsFromAllDrives
|
||||||
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
request = self.service.files().export_media(fileId=file_id, mimeType=export_mime)
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -53,9 +53,11 @@ class LangflowConnectorService:
|
||||||
filename=document.filename,
|
filename=document.filename,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
suffix = self._get_file_extension(document.mimetype)
|
||||||
|
|
||||||
# Create temporary file from document content
|
# Create temporary file from document content
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
delete=False, suffix=self._get_file_extension(document.mimetype)
|
delete=False, suffix=suffix
|
||||||
) as tmp_file:
|
) as tmp_file:
|
||||||
tmp_file.write(document.content)
|
tmp_file.write(document.content)
|
||||||
tmp_file.flush()
|
tmp_file.flush()
|
||||||
|
|
@ -65,7 +67,7 @@ class LangflowConnectorService:
|
||||||
logger.debug("Uploading file to Langflow", filename=document.filename)
|
logger.debug("Uploading file to Langflow", filename=document.filename)
|
||||||
content = document.content
|
content = document.content
|
||||||
file_tuple = (
|
file_tuple = (
|
||||||
document.filename,
|
document.filename.replace(" ", "_").replace("/", "_")+suffix,
|
||||||
content,
|
content,
|
||||||
document.mimetype or "application/octet-stream",
|
document.mimetype or "application/octet-stream",
|
||||||
)
|
)
|
||||||
|
|
@ -91,7 +93,13 @@ class LangflowConnectorService:
|
||||||
tweaks = {} # Let Langflow handle the ingestion with default settings
|
tweaks = {} # Let Langflow handle the ingestion with default settings
|
||||||
|
|
||||||
ingestion_result = await self.langflow_service.run_ingestion_flow(
|
ingestion_result = await self.langflow_service.run_ingestion_flow(
|
||||||
file_paths=[langflow_file_path], jwt_token=jwt_token, tweaks=tweaks
|
file_paths=[langflow_file_path],
|
||||||
|
jwt_token=jwt_token,
|
||||||
|
tweaks=tweaks,
|
||||||
|
owner=owner_user_id,
|
||||||
|
owner_name=owner_name,
|
||||||
|
owner_email=owner_email,
|
||||||
|
connector_type=connector_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug("Ingestion flow completed", result=ingestion_result)
|
logger.debug("Ingestion flow completed", result=ingestion_result)
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class LangflowFileService:
|
||||||
"[LF] Upload failed",
|
"[LF] Upload failed",
|
||||||
status_code=resp.status_code,
|
status_code=resp.status_code,
|
||||||
reason=resp.reason_phrase,
|
reason=resp.reason_phrase,
|
||||||
body=resp.text[:500],
|
body=resp.text,
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json()
|
return resp.json()
|
||||||
|
|
@ -63,6 +63,10 @@ class LangflowFileService:
|
||||||
jwt_token: str,
|
jwt_token: str,
|
||||||
session_id: Optional[str] = None,
|
session_id: Optional[str] = None,
|
||||||
tweaks: Optional[Dict[str, Any]] = None,
|
tweaks: Optional[Dict[str, Any]] = None,
|
||||||
|
owner: Optional[str] = None,
|
||||||
|
owner_name: Optional[str] = None,
|
||||||
|
owner_email: Optional[str] = None,
|
||||||
|
connector_type: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Trigger the ingestion flow with provided file paths.
|
Trigger the ingestion flow with provided file paths.
|
||||||
|
|
@ -91,6 +95,26 @@ class LangflowFileService:
|
||||||
logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
|
logger.debug("[LF] Added JWT token to tweaks for OpenSearch components")
|
||||||
else:
|
else:
|
||||||
logger.warning("[LF] No JWT token provided")
|
logger.warning("[LF] No JWT token provided")
|
||||||
|
|
||||||
|
# Pass metadata via tweaks to OpenSearch component
|
||||||
|
metadata_tweaks = []
|
||||||
|
if owner:
|
||||||
|
metadata_tweaks.append({"key": "owner", "value": owner})
|
||||||
|
if owner_name:
|
||||||
|
metadata_tweaks.append({"key": "owner_name", "value": owner_name})
|
||||||
|
if owner_email:
|
||||||
|
metadata_tweaks.append({"key": "owner_email", "value": owner_email})
|
||||||
|
if connector_type:
|
||||||
|
metadata_tweaks.append({"key": "connector_type", "value": connector_type})
|
||||||
|
|
||||||
|
if metadata_tweaks:
|
||||||
|
# Initialize the OpenSearch component tweaks if not already present
|
||||||
|
if "OpenSearchHybrid-Ve6bS" not in tweaks:
|
||||||
|
tweaks["OpenSearchHybrid-Ve6bS"] = {}
|
||||||
|
tweaks["OpenSearchHybrid-Ve6bS"]["docs_metadata"] = metadata_tweaks
|
||||||
|
logger.debug(
|
||||||
|
"[LF] Added metadata to tweaks", metadata_count=len(metadata_tweaks)
|
||||||
|
)
|
||||||
if tweaks:
|
if tweaks:
|
||||||
payload["tweaks"] = tweaks
|
payload["tweaks"] = tweaks
|
||||||
if session_id:
|
if session_id:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue