Merge pull request #631 from langflow-ai/fix-large-pdf-ingest
fix: Configurable ingestion timeout limits
This commit is contained in:
commit
8fdeafb608
2 changed files with 29 additions and 2 deletions
|
|
@ -2,6 +2,14 @@
|
||||||
# Set to true to disable Langflow ingestion and use traditional OpenRAG processor
|
# Set to true to disable Langflow ingestion and use traditional OpenRAG processor
|
||||||
# If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete)
|
# If unset or false, Langflow pipeline will be used (default: upload -> ingest -> delete)
|
||||||
DISABLE_INGEST_WITH_LANGFLOW=false
|
DISABLE_INGEST_WITH_LANGFLOW=false
|
||||||
|
|
||||||
|
# Langflow HTTP timeout configuration (in seconds)
|
||||||
|
# For large documents (300+ pages), ingestion can take 30+ minutes
|
||||||
|
# Increase these values if you experience timeouts with very large PDFs
|
||||||
|
# Default: 2400 seconds (40 minutes) total timeout, 30 seconds connection timeout
|
||||||
|
# LANGFLOW_TIMEOUT=2400
|
||||||
|
# LANGFLOW_CONNECT_TIMEOUT=30
|
||||||
|
|
||||||
# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
|
# make one like so https://docs.langflow.org/api-keys-and-authentication#langflow-secret-key
|
||||||
LANGFLOW_SECRET_KEY=
|
LANGFLOW_SECRET_KEY=
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,12 @@ DISABLE_INGEST_WITH_LANGFLOW = os.getenv(
|
||||||
"DISABLE_INGEST_WITH_LANGFLOW", "false"
|
"DISABLE_INGEST_WITH_LANGFLOW", "false"
|
||||||
).lower() in ("true", "1", "yes")
|
).lower() in ("true", "1", "yes")
|
||||||
|
|
||||||
|
# Langflow HTTP timeout configuration (in seconds)
|
||||||
|
# For large documents (300+ pages), ingestion can take 30+ minutes
|
||||||
|
# Default: 40 minutes total, 40 minutes read timeout
|
||||||
|
LANGFLOW_TIMEOUT = float(os.getenv("LANGFLOW_TIMEOUT", "2400")) # 40 minutes
|
||||||
|
LANGFLOW_CONNECT_TIMEOUT = float(os.getenv("LANGFLOW_CONNECT_TIMEOUT", "30")) # 30 seconds
|
||||||
|
|
||||||
|
|
||||||
def is_no_auth_mode():
|
def is_no_auth_mode():
|
||||||
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
|
"""Check if we're running in no-auth mode (OAuth credentials missing)"""
|
||||||
|
|
@ -317,9 +323,22 @@ class AppClients:
|
||||||
# Initialize document converter
|
# Initialize document converter
|
||||||
self.converter = create_document_converter(ocr_engine=DOCLING_OCR_ENGINE)
|
self.converter = create_document_converter(ocr_engine=DOCLING_OCR_ENGINE)
|
||||||
|
|
||||||
# Initialize Langflow HTTP client
|
# Initialize Langflow HTTP client with extended timeouts for large documents
|
||||||
|
# Use explicit timeout configuration to handle large PDF ingestion (300+ pages)
|
||||||
self.langflow_http_client = httpx.AsyncClient(
|
self.langflow_http_client = httpx.AsyncClient(
|
||||||
base_url=LANGFLOW_URL, timeout=1200.0
|
base_url=LANGFLOW_URL,
|
||||||
|
timeout=httpx.Timeout(
|
||||||
|
timeout=LANGFLOW_TIMEOUT, # Total timeout
|
||||||
|
connect=LANGFLOW_CONNECT_TIMEOUT, # Connection timeout
|
||||||
|
read=LANGFLOW_TIMEOUT, # Read timeout (most important for large PDFs)
|
||||||
|
write=LANGFLOW_CONNECT_TIMEOUT, # Write timeout
|
||||||
|
pool=LANGFLOW_CONNECT_TIMEOUT, # Pool timeout
|
||||||
|
)
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Initialized Langflow HTTP client with extended timeouts",
|
||||||
|
timeout_seconds=LANGFLOW_TIMEOUT,
|
||||||
|
connect_timeout_seconds=LANGFLOW_CONNECT_TIMEOUT,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue