improve embedding generation timeout hadling w/ retry and error handling

2025-10-11 01:06:14 -04:00 · 2025-10-11 01:06:14 -04:00 · a424bb422a
commit a424bb422a
parent 0c696afef8
7 changed files with 91 additions and 31 deletions
--- a/flows/components/opensearch.py
+++ b/flows/components/opensearch.py
@ -660,10 +660,17 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent):
            msg = "Embedding handle is required to embed documents."
            raise ValueError(msg)
-        # Generate embeddings (threaded for concurrency)
+        # Generate embeddings (threaded for concurrency) with retries
        def embed_chunk(chunk_text: str) -> list[float]:
            return self.embedding.embed_documents([chunk_text])[0]
        vectors: list[list[float]] | None = None
        last_exception: Exception | None = None
        delay = 1.0
        attempts = 0
        while attempts < 3:
            attempts += 1
            try:
                max_workers = min(max(len(texts), 1), 8)
                with ThreadPoolExecutor(max_workers=max_workers) as executor:
@ -672,12 +679,28 @@ class OpenSearchVectorStoreComponent(LCVectorStoreComponent):
                    for future in as_completed(futures):
                        idx = futures[future]
                        vectors[idx] = future.result()
                break
            except Exception as exc:
-            logger.warning(
+                last_exception = exc
-                "Threaded embedding generation failed, falling back to synchronous mode: %s",
+                if attempts >= 3:
-                exc,
+                    logger.error(
                        "Embedding generation failed after retries",
                        error=str(exc),
                    )
                    raise
                logger.warning(
                    "Threaded embedding generation failed (attempt %s/%s), retrying in %.1fs",
                    attempts,
                    3,
                    delay,
                )
                time.sleep(delay)
                delay = min(delay * 2, 8.0)
        if vectors is None:
            raise RuntimeError(
                f"Embedding generation failed: {last_exception}" if last_exception else "Embedding generation failed"
            )
            vectors = self.embedding.embed_documents(texts)
        if not vectors:
            self.log("No vectors generated from documents.")
--- a/flows/ingestion_flow.json
+++ b/flows/ingestion_flow.json
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
--- a/flows/openrag_ingest_docling.json
+++ b/flows/openrag_ingest_docling.json
--- a/flows/openrag_nudges.json
+++ b/flows/openrag_nudges.json
--- a/flows/openrag_url_mcp.json
+++ b/flows/openrag_url_mcp.json
--- a/src/services/search_service.py
+++ b/src/services/search_service.py
@ -7,6 +7,10 @@ from utils.logging_config import get_logger
 logger = get_logger(__name__)
 MAX_EMBED_RETRIES = 3
 EMBED_RETRY_INITIAL_DELAY = 1.0
 EMBED_RETRY_MAX_DELAY = 8.0
 class SearchService:
    def __init__(self, session_manager=None):
@ -137,20 +141,53 @@ class SearchService:
            import asyncio
            async def embed_with_model(model_name):
                delay = EMBED_RETRY_INITIAL_DELAY
                attempts = 0
                last_exception = None
                while attempts < MAX_EMBED_RETRIES:
                    attempts += 1
                    try:
                        resp = await clients.patched_async_client.embeddings.create(
                            model=model_name, input=[query]
                        )
                        return model_name, resp.data[0].embedding
                    except Exception as e:
-                    logger.error(f"Failed to embed with model {model_name}", error=str(e))
+                        last_exception = e
-                    return model_name, None
+                        if attempts >= MAX_EMBED_RETRIES:
                            logger.error(
                                "Failed to embed with model after retries",
                                model=model_name,
                                attempts=attempts,
                                error=str(e),
                            )
                            raise RuntimeError(
                                f"Failed to embed with model {model_name}"
                            ) from e
                        logger.warning(
                            "Retrying embedding generation",
                            model=model_name,
                            attempt=attempts,
                            max_attempts=MAX_EMBED_RETRIES,
                            error=str(e),
                        )
                        await asyncio.sleep(delay)
                        delay = min(delay * 2, EMBED_RETRY_MAX_DELAY)
                # Should not reach here, but guard in case
                raise RuntimeError(
                    f"Failed to embed with model {model_name}"
                ) from last_exception
            # Run all embeddings in parallel
            try:
                embedding_results = await asyncio.gather(
-                *[embed_with_model(model) for model in available_models],
+                    *[embed_with_model(model) for model in available_models]
                return_exceptions=True
                )
            except Exception as e:
                logger.error("Embedding generation failed", error=str(e))
                raise
            # Collect successful embeddings
            for result in embedding_results: