From af741f37847cbf672dc77444e00bdbcf794e3ea5 Mon Sep 17 00:00:00 2001
From: phact <estevezsebastian@gmail.com>
Date: Fri, 10 Oct 2025 22:46:36 -0400
Subject: [PATCH] other flows

---
 flows/openrag_agent.json          |   4 +-
 flows/openrag_ingest_docling.json |   4 +-
 flows/openrag_nudges.json         |   4 +-
 flows/openrag_url_mcp.json        |   4 +-
 scripts/extract_flow_component.py | 174 ++++++++++++++++++++++++++++++
 scripts/update_flow_components.py | 138 ++++++++++++++++++++++++
 6 files changed, 320 insertions(+), 8 deletions(-)
 create mode 100644 scripts/extract_flow_component.py
 create mode 100644 scripts/update_flow_components.py

diff --git a/flows/openrag_agent.json b/flows/openrag_agent.json
index bb02b425..1bd99bb6 100644
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
@@ -860,7 +860,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\nfrom opensearchpy.exceptions import RequestError\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            },\n        }\n\n    def _ensure_vector_field_mapping(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        vector_field: str,\n        dim: int,\n        engine: str,\n        space_type: str,\n        ef_construction: int,\n        m: int,\n    ) -> None:\n        \"\"\"Ensure the target vector field exists with the correct mapping.\"\"\"\n        try:\n            mapping = {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            }\n            client.indices.put_mapping(index=index_name, body=mapping)\n            logger.info(\n                \"Added/updated vector field mapping for %s in index %s\",\n                vector_field,\n                index_name,\n            )\n        except Exception as exc:\n            logger.warning(\n                \"Could not ensure vector field mapping for %s: %s\",\n                vector_field,\n                exc,\n            )\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n        vector_dimensions = len(embeddings[0]) if embeddings else None\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            if vector_dimensions is not None and \"embedding_dimensions\" not in metadata:\n                metadata = {**metadata, \"embedding_dimensions\": vector_dimensions}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        try:\n            if not client.indices.exists(index=self.index_name):\n                self.log(f\"Creating index '{self.index_name}' with base mapping\")\n                client.indices.create(index=self.index_name, body=mapping)\n        except RequestError as creation_error:\n            if getattr(creation_error, \"error\", \"\") != \"resource_already_exists_exception\":\n                logger.warning(\n                    \"Failed to create index %s: %s\",\n                    self.index_name,\n                    creation_error,\n                )\n\n        self._ensure_vector_field_mapping(\n            client=client,\n            index_name=self.index_name,\n            vector_field=self.vector_field,\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"embedding_dimensions\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
               },
               "docs_metadata": {
                 "_input_type": "TableInput",
@@ -2842,4 +2842,4 @@
     "assistants",
     "agents"
   ]
-}
\ No newline at end of file
+}
diff --git a/flows/openrag_ingest_docling.json b/flows/openrag_ingest_docling.json
index f0e8b164..7b31bbb3 100644
--- a/flows/openrag_ingest_docling.json
+++ b/flows/openrag_ingest_docling.json
@@ -677,7 +677,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        # Replace string \"None\" values with actual None\n        for key, value in additional_metadata.items():\n            if value == \"None\":\n                additional_metadata[key] = None\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\nfrom opensearchpy.exceptions import RequestError\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            },\n        }\n\n    def _ensure_vector_field_mapping(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        vector_field: str,\n        dim: int,\n        engine: str,\n        space_type: str,\n        ef_construction: int,\n        m: int,\n    ) -> None:\n        \"\"\"Ensure the target vector field exists with the correct mapping.\"\"\"\n        try:\n            mapping = {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            }\n            client.indices.put_mapping(index=index_name, body=mapping)\n            logger.info(\n                \"Added/updated vector field mapping for %s in index %s\",\n                vector_field,\n                index_name,\n            )\n        except Exception as exc:\n            logger.warning(\n                \"Could not ensure vector field mapping for %s: %s\",\n                vector_field,\n                exc,\n            )\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n        vector_dimensions = len(embeddings[0]) if embeddings else None\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            if vector_dimensions is not None and \"embedding_dimensions\" not in metadata:\n                metadata = {**metadata, \"embedding_dimensions\": vector_dimensions}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        try:\n            if not client.indices.exists(index=self.index_name):\n                self.log(f\"Creating index '{self.index_name}' with base mapping\")\n                client.indices.create(index=self.index_name, body=mapping)\n        except RequestError as creation_error:\n            if getattr(creation_error, \"error\", \"\") != \"resource_already_exists_exception\":\n                logger.warning(\n                    \"Failed to create index %s: %s\",\n                    self.index_name,\n                    creation_error,\n                )\n\n        self._ensure_vector_field_mapping(\n            client=client,\n            index_name=self.index_name,\n            vector_field=self.vector_field,\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"embedding_dimensions\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
               },
               "docs_metadata": {
                 "_input_type": "TableInput",
@@ -2807,4 +2807,4 @@
     "rag",
     "q-a"
   ]
-}
\ No newline at end of file
+}
diff --git a/flows/openrag_nudges.json b/flows/openrag_nudges.json
index 7ed390d7..432f72c9 100644
--- a/flows/openrag_nudges.json
+++ b/flows/openrag_nudges.json
@@ -871,7 +871,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        # Replace string \"None\" values with actual None\n        for key, value in additional_metadata.items():\n            if value == \"None\":\n                additional_metadata[key] = None\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\nfrom opensearchpy.exceptions import RequestError\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            },\n        }\n\n    def _ensure_vector_field_mapping(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        vector_field: str,\n        dim: int,\n        engine: str,\n        space_type: str,\n        ef_construction: int,\n        m: int,\n    ) -> None:\n        \"\"\"Ensure the target vector field exists with the correct mapping.\"\"\"\n        try:\n            mapping = {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            }\n            client.indices.put_mapping(index=index_name, body=mapping)\n            logger.info(\n                \"Added/updated vector field mapping for %s in index %s\",\n                vector_field,\n                index_name,\n            )\n        except Exception as exc:\n            logger.warning(\n                \"Could not ensure vector field mapping for %s: %s\",\n                vector_field,\n                exc,\n            )\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n        vector_dimensions = len(embeddings[0]) if embeddings else None\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            if vector_dimensions is not None and \"embedding_dimensions\" not in metadata:\n                metadata = {**metadata, \"embedding_dimensions\": vector_dimensions}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        try:\n            if not client.indices.exists(index=self.index_name):\n                self.log(f\"Creating index '{self.index_name}' with base mapping\")\n                client.indices.create(index=self.index_name, body=mapping)\n        except RequestError as creation_error:\n            if getattr(creation_error, \"error\", \"\") != \"resource_already_exists_exception\":\n                logger.warning(\n                    \"Failed to create index %s: %s\",\n                    self.index_name,\n                    creation_error,\n                )\n\n        self._ensure_vector_field_mapping(\n            client=client,\n            index_name=self.index_name,\n            vector_field=self.vector_field,\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"embedding_dimensions\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
               },
               "docs_metadata": {
                 "_input_type": "TableInput",
@@ -2347,4 +2347,4 @@
     "assistants",
     "agents"
   ]
-}
\ No newline at end of file
+}
diff --git a/flows/openrag_url_mcp.json b/flows/openrag_url_mcp.json
index 9cab0fed..672c0d65 100644
--- a/flows/openrag_url_mcp.json
+++ b/flows/openrag_url_mcp.json
@@ -681,7 +681,7 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                     \"load_from_db\": True\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\nfrom opensearchpy.exceptions import RequestError\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            },\n        }\n\n    def _ensure_vector_field_mapping(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        vector_field: str,\n        dim: int,\n        engine: str,\n        space_type: str,\n        ef_construction: int,\n        m: int,\n    ) -> None:\n        \"\"\"Ensure the target vector field exists with the correct mapping.\"\"\"\n        try:\n            mapping = {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    },\n                    \"embedding_dimensions\": {\n                        \"type\": \"integer\"\n                    }\n                }\n            }\n            client.indices.put_mapping(index=index_name, body=mapping)\n            logger.info(\n                \"Added/updated vector field mapping for %s in index %s\",\n                vector_field,\n                index_name,\n            )\n        except Exception as exc:\n            logger.warning(\n                \"Could not ensure vector field mapping for %s: %s\",\n                vector_field,\n                exc,\n            )\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n        vector_dimensions = len(embeddings[0]) if embeddings else None\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            if vector_dimensions is not None and \"embedding_dimensions\" not in metadata:\n                metadata = {**metadata, \"embedding_dimensions\": vector_dimensions}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        try:\n            if not client.indices.exists(index=self.index_name):\n                self.log(f\"Creating index '{self.index_name}' with base mapping\")\n                client.indices.create(index=self.index_name, body=mapping)\n        except RequestError as creation_error:\n            if getattr(creation_error, \"error\", \"\") != \"resource_already_exists_exception\":\n                logger.warning(\n                    \"Failed to create index %s: %s\",\n                    self.index_name,\n                    creation_error,\n                )\n\n        self._ensure_vector_field_mapping(\n            client=client,\n            index_name=self.index_name,\n            vector_field=self.vector_field,\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"embedding_dimensions\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
               },
               "docs_metadata": {
                 "_input_type": "TableInput",
@@ -3618,4 +3618,4 @@
     "rag",
     "q-a"
   ]
-}
\ No newline at end of file
+}
diff --git a/scripts/extract_flow_component.py b/scripts/extract_flow_component.py
new file mode 100644
index 00000000..8312f9f2
--- /dev/null
+++ b/scripts/extract_flow_component.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+"""
+Extract embedded component code from a Langflow JSON flow.
+
+Example:
+    python scripts/extract_flow_component.py \\
+        --flow-file flows/ingestion_flow.json \\
+        --display-name "OpenSearch (Multi-Model)" \\
+        --output flows/components/opensearch_multimodel.py
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Optional
+
+
+def should_select_component(
+    node: dict,
+    *,
+    display_name: Optional[str],
+    metadata_module: Optional[str],
+) -> bool:
+    """Return True if the node matches the requested component filters."""
+    node_data = node.get("data", {})
+    component = node_data.get("node", {})
+
+    if display_name and component.get("display_name") != display_name:
+        return False
+
+    if metadata_module:
+        metadata = component.get("metadata", {})
+        if metadata.get("module") != metadata_module:
+            return False
+
+    template = component.get("template", {})
+    code_entry = template.get("code")
+    return isinstance(code_entry, dict) and "value" in code_entry
+
+
+def extract_code_from_flow(
+    flow_path: Path,
+    *,
+    display_name: Optional[str],
+    metadata_module: Optional[str],
+    match_index: int,
+) -> str:
+    """Fetch the embedded code string from the matching component node."""
+    try:
+        flow_data = json.loads(flow_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise SystemExit(f"[error] failed to parse {flow_path}: {exc}") from exc
+
+    matches = []
+    for node in flow_data.get("data", {}).get("nodes", []):
+        if should_select_component(
+            node,
+            display_name=display_name,
+            metadata_module=metadata_module,
+        ):
+            matches.append(node)
+
+    if not matches:
+        raise SystemExit(
+            "[error] no component found matching the supplied filters "
+            f"in {flow_path}"
+        )
+
+    if match_index < 0 or match_index >= len(matches):
+        raise SystemExit(
+            f"[error] match index {match_index} out of range "
+            f"(found {len(matches)} matches)"
+        )
+
+    target = matches[match_index]
+    return target["data"]["node"]["template"]["code"]["value"]
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Extract component code from a Langflow JSON flow."
+    )
+    parser.add_argument(
+        "--flow-file",
+        required=True,
+        type=Path,
+        help="Path to the flow JSON file.",
+    )
+    parser.add_argument(
+        "--display-name",
+        help="Component display_name to match (e.g. 'OpenSearch (Multi-Model)').",
+    )
+    parser.add_argument(
+        "--metadata-module",
+        help="Component metadata.module value to match.",
+    )
+    parser.add_argument(
+        "--match-index",
+        type=int,
+        default=0,
+        help="Index of the matched component when multiple exist (default: 0).",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        help="Destination file for the extracted code (stdout if omitted).",
+    )
+
+    args = parser.parse_args()
+
+    if not args.display_name and not args.metadata_module:
+        # Offer an interactive selection of component display names
+        if not args.flow_file.exists():
+            parser.error(f"Flow file not found: {args.flow_file}")
+
+        try:
+            flow_data = json.loads(args.flow_file.read_text(encoding="utf-8"))
+        except json.JSONDecodeError as exc:
+            raise SystemExit(f"[error] failed to parse {args.flow_file}: {exc}") from exc
+
+        nodes = flow_data.get("data", {}).get("nodes", [])
+        display_names = sorted(
+            {
+                node.get("data", {})
+                .get("node", {})
+                .get("display_name", "<unknown>")
+                for node in nodes
+            }
+        )
+
+        if not display_names:
+            parser.error(
+                "Unable to locate any components in the flow; supply --metadata-module instead."
+            )
+
+        print("Select a component display name:")
+        for idx, name in enumerate(display_names):
+            print(f"  [{idx}] {name}")
+
+        while True:
+            choice = input(f"Enter choice (0-{len(display_names)-1}): ").strip() or "0"
+            if choice.isdigit():
+                index = int(choice)
+                if 0 <= index < len(display_names):
+                    args.display_name = display_names[index]
+                    break
+            print("Invalid selection, please try again.")
+
+    return args
+
+
+def main() -> None:
+    args = parse_args()
+
+    if not args.flow_file.exists():
+        raise SystemExit(f"[error] flow file not found: {args.flow_file}")
+
+    code = extract_code_from_flow(
+        args.flow_file,
+        display_name=args.display_name,
+        metadata_module=args.metadata_module,
+        match_index=args.match_index,
+    )
+
+    if args.output:
+        args.output.write_text(code, encoding="utf-8")
+    else:
+        print(code)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/update_flow_components.py b/scripts/update_flow_components.py
new file mode 100644
index 00000000..8aaafd3a
--- /dev/null
+++ b/scripts/update_flow_components.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+Utility to sync embedded component code inside Langflow JSON files.
+
+Given a Python source file (e.g. the OpenSearch component implementation) and
+a target selector, this script updates every flow definition in ``./flows`` so
+that the component's ``template.code.value`` matches the supplied file.
+
+Example:
+    python scripts/update_flow_components.py \\
+        --code-file flows/components/opensearch_multimodel.py \\
+        --display-name \"OpenSearch (Multi-Model)\"
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Iterable
+
+
+def load_code(source_path: Path) -> str:
+    try:
+        return source_path.read_text(encoding="utf-8")
+    except FileNotFoundError as exc:
+        raise SystemExit(f"[error] code file not found: {source_path}") from exc
+
+
+def should_update_component(node: dict, *, display_name: str | None, metadata_module: str | None) -> bool:
+    node_data = node.get("data", {})
+    component = node_data.get("node", {})
+
+    if display_name and component.get("display_name") != display_name:
+        return False
+
+    if metadata_module:
+        metadata = component.get("metadata", {})
+        module_name = metadata.get("module")
+        if module_name != metadata_module:
+            return False
+
+    template = component.get("template", {})
+    code_entry = template.get("code")
+    return isinstance(code_entry, dict) and "value" in code_entry
+
+
+def update_flow(flow_path: Path, code: str, *, display_name: str | None, metadata_module: str | None, dry_run: bool) -> bool:
+    with flow_path.open(encoding="utf-8") as fh:
+        try:
+            data = json.load(fh)
+        except json.JSONDecodeError as exc:
+            raise SystemExit(f"[error] failed to parse {flow_path}: {exc}") from exc
+
+    changed = False
+
+    for node in data.get("data", {}).get("nodes", []):
+        if not should_update_component(node, display_name=display_name, metadata_module=metadata_module):
+            continue
+
+        template = node["data"]["node"]["template"]
+        if template["code"]["value"] != code:
+            if dry_run:
+                changed = True
+            else:
+                template["code"]["value"] = code
+                changed = True
+
+    if changed and not dry_run:
+        flow_path.write_text(
+            json.dumps(data, indent=2, ensure_ascii=False) + "\n",
+            encoding="utf-8",
+        )
+
+    return changed
+
+
+def iter_flow_files(flows_dir: Path) -> Iterable[Path]:
+    for path in sorted(flows_dir.glob("*.json")):
+        if path.is_file():
+            yield path
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Update embedded component code in Langflow JSON files.")
+    parser.add_argument("--code-file", required=True, type=Path, help="Path to the Python file containing the component code.")
+    parser.add_argument("--flows-dir", type=Path, default=Path("flows"), help="Directory containing Langflow JSON files.")
+    parser.add_argument("--display-name", help="Component display_name to match (e.g. 'OpenSearch (Multi-Model)').")
+    parser.add_argument("--metadata-module", help="Component metadata.module value to match.")
+    parser.add_argument("--dry-run", action="store_true", help="Report which files would change without modifying them.")
+
+    args = parser.parse_args()
+
+    if not args.display_name and not args.metadata_module:
+        parser.error("At least one of --display-name or --metadata-module must be provided.")
+
+    return args
+
+
+def main() -> None:
+    args = parse_args()
+
+    flows_dir: Path = args.flows_dir
+    if not flows_dir.exists():
+        raise SystemExit(f"[error] flows directory not found: {flows_dir}")
+
+    code = load_code(args.code_file)
+
+    updated_files = []
+    for flow_path in iter_flow_files(flows_dir):
+        changed = update_flow(
+            flow_path,
+            code,
+            display_name=args.display_name,
+            metadata_module=args.metadata_module,
+            dry_run=args.dry_run,
+        )
+        if changed:
+            updated_files.append(flow_path)
+
+    if args.dry_run:
+        if updated_files:
+            print("[dry-run] files that would be updated:")
+            for path in updated_files:
+                print(f"  - {path}")
+        else:
+            print("[dry-run] no files would change.")
+    else:
+        if updated_files:
+            print("Updated component code in:")
+            for path in updated_files:
+                print(f"  - {path}")
+        else:
+            print("No updates were necessary.")
+
+
+if __name__ == "__main__":
+    main()