let the agent do full os queries

2025-12-10 12:15:19 -05:00 · 2025-12-10 12:15:19 -05:00 · ab0c734595
commit ab0c734595
parent 015a4d955e
5 changed files with 134 additions and 90 deletions
--- a/flows/components/opensearch_multimodel.py
+++ b/flows/components/opensearch_multimodel.py
@ -12,7 +12,18 @@ from opensearchpy.exceptions import OpenSearchException, RequestError
 from lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
 from lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection
-from lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput
+from lfx.inputs.inputs import DictInput
 from lfx.io import (
    BoolInput,
    DropdownInput,
    HandleInput,
    IntInput,
    MultilineInput,
    Output,
    SecretStrInput,
    StrInput,
    TableInput,
 )
 from lfx.log import logger
 from lfx.schema.data import Data
@ -85,6 +96,32 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
    icon: str = "OpenSearch"
    description: str = (
        "Store and search documents using OpenSearch with multi-model hybrid semantic and keyword search."
        "To search use the tools search_documents and raw_search. Search documents takes a query for vector search, for example\n"
        "  {search_query: \"components in openrag\"}"
        "\n"
        "you can also override the filter_expression to limit the hybrid query in search_documents by also passing filter_expression\n"
        "for example:\n"
        "  {search_query: \"components in openrag\",  filter_expression: {\"data_sources\":[\"my_doc.md\"],\"document_types\":[\"*\"],\"owners\":[\"*\"],\"connector_types\":[\"*\"]},\"limit\":10,\"scoreThreshold\":0}"
        "\n"
        "raw_search takes actual opensearch queries for example:"
        "  {"
        "    \"size\": 100,"
        "    \"query\": {"
        "        \"term\": {\"filename\": \"AmericanAirlines-2017_4.md\"}"
        "    }"
        "    \"_source\": [\"filename\", \"text\", \"page\"]"
        " }"
        "\n"
        "or:"
        "\n"
        "  {"
        "     \"size\": 0,"
        "     \"aggs\": {"
        "         \"distinct_filenames\": {"
        "             \"cardinality\": {\"field\": \"filename\"}"
        "         }"
        "     },"
        "  }"
    )
    # Keys we consider baseline
@ -325,7 +362,55 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
                "Disable for self-signed certificates in development environments."
            ),
        ),
        # DictInput(name="query", display_name="Query", input_types=["Data"], is_list=False, tool_mode=True),
    ]
    outputs = [
        Output(
            display_name="Search Results",
            name="search_results",
            method="search_documents",
        ),
        Output(display_name="DataFrame", name="dataframe", method="as_dataframe"),
        Output(display_name="Raw Search", name="raw_search", method="raw_search"),
    ]
    def raw_search(self, query: str | None = None) -> Data:
        """Execute a raw OpenSearch query against the target index.
        Args:
            query (dict[str, Any]): The OpenSearch query DSL dictionary.
        Returns:
            Data: Search results as a Data object.
        Raises:
            ValueError: If 'query' is not a valid OpenSearch query (must be a non-empty dict).
        """
        query = self.search_query
        if isinstance(query, str):
            query = json.loads(query)
        client = self.build_client()
        logger.info(f"query: {query}")
        resp = client.search(
            index=self.index_name,
            body=query,
            params={"terminate_after": 0},
        )
        # Remove any _source keys whose value is a list of floats (embedding vectors)
        def is_vector(val):
            # Accepts if it's a list of numbers (float or int) and has reasonable vector length (>3)
            return (
                isinstance(val, list) and len(val) > 100 and all(isinstance(x, (float, int)) for x in val)
            )
        if "hits" in resp and "hits" in resp["hits"]:
            for hit in resp["hits"]["hits"]:
                source = hit.get("_source")
                if isinstance(source, dict):
                    keys_to_remove = [k for k, v in source.items() if is_vector(v)]
                    for k in keys_to_remove:
                        source.pop(k)
        logger.info(f"Raw search response (all embedding vectors removed): {resp}")
        return Data(**resp)
    def _get_embedding_model_name(self, embedding_obj=None) -> str:
        """Get the embedding model name from component config or embedding object.
@ -865,99 +950,58 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
            metadatas.append(data_copy)
        self.log(metadatas)
-        # Generate embeddings with rate-limit-aware retry logic using tenacity
+        # Generate embeddings (threaded for concurrency) with retries
-        from tenacity import (
+        def embed_chunk(chunk_text: str) -> list[float]:
-            retry,
+            return selected_embedding.embed_documents([chunk_text])[0]
            retry_if_exception,
            stop_after_attempt,
            wait_exponential,
        )
-        def is_rate_limit_error(exception: Exception) -> bool:
+        vectors: list[list[float]] | None = None
-            """Check if exception is a rate limit error (429)."""
+        last_exception: Exception | None = None
-            error_str = str(exception).lower()
+        delay = 1.0
-            return "429" in error_str or "rate_limit" in error_str or "rate limit" in error_str
+        attempts = 0
-
+        max_attempts = 3
        def is_other_retryable_error(exception: Exception) -> bool:
            """Check if exception is retryable but not a rate limit error."""
            # Retry on most exceptions except for specific non-retryable ones
            # Add other non-retryable exceptions here if needed
            return not is_rate_limit_error(exception)
        # Create retry decorator for rate limit errors (longer backoff)
        retry_on_rate_limit = retry(
            retry=retry_if_exception(is_rate_limit_error),
            stop=stop_after_attempt(5),
            wait=wait_exponential(multiplier=2, min=2, max=30),
            reraise=True,
            before_sleep=lambda retry_state: logger.warning(
                f"Rate limit hit for chunk (attempt {retry_state.attempt_number}/5), "
                f"backing off for {retry_state.next_action.sleep:.1f}s"
            ),
        )
        # Create retry decorator for other errors (shorter backoff)
        retry_on_other_errors = retry(
            retry=retry_if_exception(is_other_retryable_error),
            stop=stop_after_attempt(3),
            wait=wait_exponential(multiplier=1, min=1, max=8),
            reraise=True,
            before_sleep=lambda retry_state: logger.warning(
                f"Error embedding chunk (attempt {retry_state.attempt_number}/3), "
                f"retrying in {retry_state.next_action.sleep:.1f}s: {retry_state.outcome.exception()}"
            ),
        )
        def embed_chunk_with_retry(chunk_text: str, chunk_idx: int) -> list[float]:
            """Embed a single chunk with rate-limit-aware retry logic."""
            @retry_on_rate_limit
            @retry_on_other_errors
            def _embed(text: str) -> list[float]:
                return selected_embedding.embed_documents([text])[0]
        while attempts < max_attempts:
            attempts += 1
            try:
-                return _embed(chunk_text)
+                # Restrict concurrency for IBM/Watsonx models to avoid rate limits
-            except Exception as e:
+                is_ibm = (embedding_model and "ibm" in str(embedding_model).lower()) or (
-                logger.error(
+                    selected_embedding and "watsonx" in type(selected_embedding).__name__.lower()
                    f"Failed to embed chunk {chunk_idx} after all retries: {e}",
                    error=str(e),
                )
-                raise
+                logger.debug(f"Is IBM: {is_ibm}")
                max_workers = 1 if is_ibm else min(max(len(texts), 1), 8)
-        # Restrict concurrency for IBM/Watsonx models to avoid rate limits
+                with ThreadPoolExecutor(max_workers=max_workers) as executor:
-        is_ibm = (embedding_model and "ibm" in str(embedding_model).lower()) or (
+                    futures = {executor.submit(embed_chunk, chunk): idx for idx, chunk in enumerate(texts)}
-            selected_embedding and "watsonx" in type(selected_embedding).__name__.lower()
+                    vectors = [None] * len(texts)
-        )
+                    for future in as_completed(futures):
-        logger.debug(f"Is IBM: {is_ibm}")
+                        idx = futures[future]
                        vectors[idx] = future.result()
                break
            except Exception as exc:
                last_exception = exc
                if attempts >= max_attempts:
                    logger.error(
                        f"Embedding generation failed for model {embedding_model} after retries",
                        error=str(exc),
                    )
                    raise
                logger.warning(
                    "Threaded embedding generation failed for model %s (attempt %s/%s), retrying in %.1fs",
                    embedding_model,
                    attempts,
                    max_attempts,
                    delay,
                )
                time.sleep(delay)
                delay = min(delay * 2, 8.0)
-        # For IBM models, use sequential processing with rate limiting
+        if vectors is None:
-        # For other models, use parallel processing
+            raise RuntimeError(
-        vectors: list[list[float]] = [None] * len(texts)
+                f"Embedding generation failed for {embedding_model}: {last_exception}"
-
+                if last_exception
-        if is_ibm:
+                else f"Embedding generation failed for {embedding_model}"
            # Sequential processing with inter-request delay for IBM models
            inter_request_delay = 0.6  # ~1.67 req/s, safely under 2 req/s limit
            logger.info(
                f"Using sequential processing for IBM model with {inter_request_delay}s delay between requests"
            )
            for idx, chunk in enumerate(texts):
                if idx > 0:
                    # Add delay between requests (but not before the first one)
                    time.sleep(inter_request_delay)
                vectors[idx] = embed_chunk_with_retry(chunk, idx)
        else:
            # Parallel processing for non-IBM models
            max_workers = min(max(len(texts), 1), 8)
            logger.debug(f"Using parallel processing with {max_workers} workers")
            with ThreadPoolExecutor(max_workers=max_workers) as executor:
                futures = {executor.submit(embed_chunk_with_retry, chunk, idx): idx for idx, chunk in enumerate(texts)}
                for future in as_completed(futures):
                    idx = futures[future]
                    vectors[idx] = future.result()
        if not vectors:
            self.log(f"No vectors generated from documents for model {embedding_model}.")
            return
--- a/flows/ingestion_flow.json
+++ b/flows/ingestion_flow.json
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
--- a/flows/openrag_nudges.json
+++ b/flows/openrag_nudges.json
--- a/flows/openrag_url_mcp.json
+++ b/flows/openrag_url_mcp.json