From 392322077f6b4eb5951a1ee05f1e8a5f7810968a Mon Sep 17 00:00:00 2001
From: Edwin Jose <edwin.jose@datastax.com>
Date: Fri, 26 Sep 2025 03:08:26 -0400
Subject: [PATCH] Enable local build for OpenRAG services and update metadata
 handling

Switched OpenRAG backend and frontend in docker-compose.yml to use local Dockerfile builds instead of remote images. Updated environment variables for better clarity and system integration. In flows/openrag_agent.json and langflow_file_service, improved handling of docs_metadata to support Data objects and added logging for metadata ingestion. Added agent_llm edge to agent node in flow definition.
---
 docker-compose.yml                    |  27 +--
 flows/openrag_agent.json              | 296 +++++---------------------
 src/main.py                           |   4 +
 src/services/langflow_file_service.py |   2 +-
 4 files changed, 67 insertions(+), 262 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index d21cbb59..93fcbcdf 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -39,10 +39,10 @@ services:
       - "5601:5601"
 
   openrag-backend:
-    image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.backend
+    # image: phact/openrag-backend:${OPENRAG_VERSION:-latest}
+    build:
+      context: .
+      dockerfile: Dockerfile.backend
     container_name: openrag-backend
     depends_on:
       - langflow
@@ -76,10 +76,11 @@ services:
     gpus: all
 
   openrag-frontend:
-    image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
-    #build:
-    #context: .
-    #dockerfile: Dockerfile.frontend
+    # image: phact/openrag-frontend:${OPENRAG_VERSION:-latest}
+    build:
+      context: .
+      dockerfile: Dockerfile.frontend
+      #dockerfile: Dockerfile.frontend
     container_name: openrag-frontend
     depends_on:
       - openrag-backend
@@ -102,11 +103,11 @@ services:
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - LANGFLOW_LOAD_FLOWS_PATH=/app/flows
       - LANGFLOW_SECRET_KEY=${LANGFLOW_SECRET_KEY}
-      - JWT="dummy"
-      - OWNER="dummy"
-      - OWNER_NAME="dummy"
-      - OWNER_EMAIL="dummy"
-      - CONNECTOR_TYPE="dummy"
+      - JWT=None  
+      - OWNER=None
+      - OWNER_NAME=None
+      - OWNER_EMAIL=None
+      - CONNECTOR_TYPE=system
       - OPENRAG-QUERY-FILTER="{}"
       - OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
       - LANGFLOW_VARIABLES_TO_GET_FROM_ENVIRONMENT=JWT,OPENRAG-QUERY-FILTER,OPENSEARCH_PASSWORD,OWNER,OWNER_NAME,OWNER_EMAIL,CONNECTOR_TYPE
diff --git a/flows/openrag_agent.json b/flows/openrag_agent.json
index 8a76a475..0b2f5859 100644
--- a/flows/openrag_agent.json
+++ b/flows/openrag_agent.json
@@ -114,6 +114,33 @@
         "sourceHandle": "{œdataTypeœ:œOpenSearchVectorStoreComponentœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œcomponent_as_toolœ,œoutput_typesœ:[œToolœ]}",
         "target": "Agent-crjWf",
         "targetHandle": "{œfieldNameœ:œtoolsœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œToolœ],œtypeœ:œotherœ}"
+      },
+      {
+        "animated": false,
+        "data": {
+          "sourceHandle": {
+            "dataType": "LanguageModelComponent",
+            "id": "LanguageModelComponent-0YME7",
+            "name": "model_output",
+            "output_types": [
+              "LanguageModel"
+            ]
+          },
+          "targetHandle": {
+            "fieldName": "agent_llm",
+            "id": "Agent-crjWf",
+            "inputTypes": [
+              "LanguageModel"
+            ],
+            "type": "str"
+          }
+        },
+        "id": "xy-edge__LanguageModelComponent-0YME7{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-0YME7œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}-Agent-crjWf{œfieldNameœ:œagent_llmœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œstrœ}",
+        "selected": false,
+        "source": "LanguageModelComponent-0YME7",
+        "sourceHandle": "{œdataTypeœ:œLanguageModelComponentœ,œidœ:œLanguageModelComponent-0YME7œ,œnameœ:œmodel_outputœ,œoutput_typesœ:[œLanguageModelœ]}",
+        "target": "Agent-crjWf",
+        "targetHandle": "{œfieldNameœ:œagent_llmœ,œidœ:œAgent-crjWfœ,œinputTypesœ:[œLanguageModelœ],œtypeœ:œstrœ}"
       }
     ],
     "nodes": [
@@ -674,11 +701,9 @@
             ],
             "frozen": false,
             "icon": "OpenSearch",
-            "last_updated": "2025-09-26T05:15:05.779Z",
             "legacy": false,
-            "lf_version": "1.6.0",
             "metadata": {
-              "code_hash": "b19e82f1314a",
+              "code_hash": "07eef12db820",
               "dependencies": {
                 "dependencies": [
                   {
@@ -779,14 +804,17 @@
                 "show": true,
                 "title_case": false,
                 "type": "code",
-                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            advanced=True,\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            for item in self.docs_metadata:\n                if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                    additional_metadata[item[\"key\"]] = item[\"value\"]\n\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
+                "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n    \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n    This component provides vector storage and retrieval using OpenSearch, combining semantic\n    similarity search (KNN) with keyword-based search for optimal results. It supports document\n    ingestion, vector embeddings, and advanced filtering with authentication options.\n\n    Features:\n    - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n    - Hybrid search combining KNN vector similarity and keyword matching\n    - Flexible authentication (Basic auth, JWT tokens)\n    - Advanced filtering and aggregations\n    - Metadata injection during document ingestion\n    \"\"\"\n\n    display_name: str = \"OpenSearch\"\n    icon: str = \"OpenSearch\"\n    description: str = (\n        \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n    )\n\n    # Keys we consider baseline\n    default_keys: list[str] = [\n        \"opensearch_url\",\n        \"index_name\",\n        *[i.name for i in LCVectorStoreComponent.inputs],  # search_query, add_documents, etc.\n        \"embedding\",\n        \"vector_field\",\n        \"number_of_results\",\n        \"auth_mode\",\n        \"username\",\n        \"password\",\n        \"jwt_token\",\n        \"jwt_header\",\n        \"bearer_prefix\",\n        \"use_ssl\",\n        \"verify_certs\",\n        \"filter_expression\",\n        \"engine\",\n        \"space_type\",\n        \"ef_construction\",\n        \"m\",\n        \"docs_metadata\",\n    ]\n\n    inputs = [\n        TableInput(\n            name=\"docs_metadata\",\n            display_name=\"Document Metadata\",\n            info=(\n                \"Additional metadata key-value pairs to be added to all ingested documents. \"\n                \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n            ),\n            table_schema=[\n                {\n                    \"name\": \"key\",\n                    \"display_name\": \"Key\",\n                    \"type\": \"str\",\n                    \"description\": \"Key name\",\n                },\n                {\n                    \"name\": \"value\",\n                    \"display_name\": \"Value\",\n                    \"type\": \"str\",\n                    \"description\": \"Value of the metadata\",\n                },\n            ],\n            value=[],\n            # advanced=True,\n            input_types=[\"Data\"]\n        ),\n        StrInput(\n            name=\"opensearch_url\",\n            display_name=\"OpenSearch URL\",\n            value=\"http://localhost:9200\",\n            info=(\n                \"The connection URL for your OpenSearch cluster \"\n                \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n            ),\n        ),\n        StrInput(\n            name=\"index_name\",\n            display_name=\"Index Name\",\n            value=\"langflow\",\n            info=(\n                \"The OpenSearch index name where documents will be stored and searched. \"\n                \"Will be created automatically if it doesn't exist.\"\n            ),\n        ),\n        DropdownInput(\n            name=\"engine\",\n            display_name=\"Vector Engine\",\n            options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n            value=\"jvector\",\n            info=(\n                \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n                \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n            ),\n            advanced=True,\n        ),\n        DropdownInput(\n            name=\"space_type\",\n            display_name=\"Distance Metric\",\n            options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n            value=\"l2\",\n            info=(\n                \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n                \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"ef_construction\",\n            display_name=\"EF Construction\",\n            value=512,\n            info=(\n                \"Size of the dynamic candidate list during index construction. \"\n                \"Higher values improve recall but increase indexing time and memory usage.\"\n            ),\n            advanced=True,\n        ),\n        IntInput(\n            name=\"m\",\n            display_name=\"M Parameter\",\n            value=16,\n            info=(\n                \"Number of bidirectional connections for each vector in the HNSW graph. \"\n                \"Higher values improve search quality but increase memory usage and indexing time.\"\n            ),\n            advanced=True,\n        ),\n        *LCVectorStoreComponent.inputs,  # includes search_query, add_documents, etc.\n        HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n        StrInput(\n            name=\"vector_field\",\n            display_name=\"Vector Field Name\",\n            value=\"chunk_embedding\",\n            advanced=True,\n            info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n        ),\n        IntInput(\n            name=\"number_of_results\",\n            display_name=\"Default Result Limit\",\n            value=10,\n            advanced=True,\n            info=(\n                \"Default maximum number of search results to return when no limit is \"\n                \"specified in the filter expression.\"\n            ),\n        ),\n        MultilineInput(\n            name=\"filter_expression\",\n            display_name=\"Search Filters (JSON)\",\n            value=\"\",\n            info=(\n                \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n                \"Format 1 - Explicit filters:\\n\"\n                '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n                '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n                \"Format 2 - Context-style mapping:\\n\"\n                '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n                \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n            ),\n        ),\n        # ----- Auth controls (dynamic) -----\n        DropdownInput(\n            name=\"auth_mode\",\n            display_name=\"Authentication Mode\",\n            value=\"basic\",\n            options=[\"basic\", \"jwt\"],\n            info=(\n                \"Authentication method: 'basic' for username/password authentication, \"\n                \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n            ),\n            real_time_refresh=True,\n            advanced=False,\n        ),\n        StrInput(\n            name=\"username\",\n            display_name=\"Username\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"password\",\n            display_name=\"OpenSearch Password\",\n            value=\"admin\",\n            show=False,\n        ),\n        SecretStrInput(\n            name=\"jwt_token\",\n            display_name=\"JWT Token\",\n            value=\"JWT\",\n            load_from_db=False,\n            show=True,\n            info=(\n                \"Valid JSON Web Token for authentication. \"\n                \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n            ),\n        ),\n        StrInput(\n            name=\"jwt_header\",\n            display_name=\"JWT Header Name\",\n            value=\"Authorization\",\n            show=False,\n            advanced=True,\n        ),\n        BoolInput(\n            name=\"bearer_prefix\",\n            display_name=\"Prefix 'Bearer '\",\n            value=True,\n            show=False,\n            advanced=True,\n        ),\n        # ----- TLS -----\n        BoolInput(\n            name=\"use_ssl\",\n            display_name=\"Use SSL/TLS\",\n            value=True,\n            advanced=True,\n            info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n        ),\n        BoolInput(\n            name=\"verify_certs\",\n            display_name=\"Verify SSL Certificates\",\n            value=False,\n            advanced=True,\n            info=(\n                \"Verify SSL certificates when connecting. \"\n                \"Disable for self-signed certificates in development environments.\"\n            ),\n        ),\n    ]\n\n    # ---------- helper functions for index management ----------\n    def _default_text_mapping(\n        self,\n        dim: int,\n        engine: str = \"jvector\",\n        space_type: str = \"l2\",\n        ef_search: int = 512,\n        ef_construction: int = 100,\n        m: int = 16,\n        vector_field: str = \"vector_field\",\n    ) -> dict[str, Any]:\n        \"\"\"Create the default OpenSearch index mapping for vector search.\n\n        This method generates the index configuration with k-NN settings optimized\n        for approximate nearest neighbor search using the specified vector engine.\n\n        Args:\n            dim: Dimensionality of the vector embeddings\n            engine: Vector search engine (jvector, nmslib, faiss, lucene)\n            space_type: Distance metric for similarity calculation\n            ef_search: Size of dynamic list used during search\n            ef_construction: Size of dynamic list used during index construction\n            m: Number of bidirectional links for each vector\n            vector_field: Name of the field storing vector embeddings\n\n        Returns:\n            Dictionary containing OpenSearch index mapping configuration\n        \"\"\"\n        return {\n            \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n            \"mappings\": {\n                \"properties\": {\n                    vector_field: {\n                        \"type\": \"knn_vector\",\n                        \"dimension\": dim,\n                        \"method\": {\n                            \"name\": \"disk_ann\",\n                            \"space_type\": space_type,\n                            \"engine\": engine,\n                            \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n                        },\n                    }\n                }\n            },\n        }\n\n    def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n        \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n        Amazon OpenSearch Serverless has restrictions on which vector engines\n        can be used. This method ensures the selected engine is compatible.\n\n        Args:\n            is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n            engine: The selected vector search engine\n\n        Raises:\n            ValueError: If AOSS is used with an incompatible engine\n        \"\"\"\n        if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n            msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n            raise ValueError(msg)\n\n    def _is_aoss_enabled(self, http_auth: Any) -> bool:\n        \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n        Args:\n            http_auth: The HTTP authentication object\n\n        Returns:\n            True if AOSS is enabled, False otherwise\n        \"\"\"\n        return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n    def _bulk_ingest_embeddings(\n        self,\n        client: OpenSearch,\n        index_name: str,\n        embeddings: list[list[float]],\n        texts: list[str],\n        metadatas: list[dict] | None = None,\n        ids: list[str] | None = None,\n        vector_field: str = \"vector_field\",\n        text_field: str = \"text\",\n        mapping: dict | None = None,\n        max_chunk_bytes: int | None = 1 * 1024 * 1024,\n        *,\n        is_aoss: bool = False,\n    ) -> list[str]:\n        \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n        This method uses bulk operations to insert documents with their vector\n        embeddings and metadata into the specified OpenSearch index.\n\n        Args:\n            client: OpenSearch client instance\n            index_name: Target index for document storage\n            embeddings: List of vector embeddings for each document\n            texts: List of document texts\n            metadatas: Optional metadata dictionaries for each document\n            ids: Optional document IDs (UUIDs generated if not provided)\n            vector_field: Field name for storing vector embeddings\n            text_field: Field name for storing document text\n            mapping: Optional index mapping configuration\n            max_chunk_bytes: Maximum size per bulk request chunk\n            is_aoss: Whether using Amazon OpenSearch Serverless\n\n        Returns:\n            List of document IDs that were successfully ingested\n        \"\"\"\n        if not mapping:\n            mapping = {}\n\n        requests = []\n        return_ids = []\n\n        for i, text in enumerate(texts):\n            metadata = metadatas[i] if metadatas else {}\n            _id = ids[i] if ids else str(uuid.uuid4())\n            request = {\n                \"_op_type\": \"index\",\n                \"_index\": index_name,\n                vector_field: embeddings[i],\n                text_field: text,\n                **metadata,\n            }\n            if is_aoss:\n                request[\"id\"] = _id\n            else:\n                request[\"_id\"] = _id\n            requests.append(request)\n            return_ids.append(_id)\n        if metadatas:\n            self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n        helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n        return return_ids\n\n    # ---------- auth / client ----------\n    def _build_auth_kwargs(self) -> dict[str, Any]:\n        \"\"\"Build authentication configuration for OpenSearch client.\n\n        Constructs the appropriate authentication parameters based on the\n        selected auth mode (basic username/password or JWT token).\n\n        Returns:\n            Dictionary containing authentication configuration\n\n        Raises:\n            ValueError: If required authentication parameters are missing\n        \"\"\"\n        mode = (self.auth_mode or \"basic\").strip().lower()\n        if mode == \"jwt\":\n            token = (self.jwt_token or \"\").strip()\n            if not token:\n                msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n                raise ValueError(msg)\n            header_name = (self.jwt_header or \"Authorization\").strip()\n            header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n            return {\"headers\": {header_name: header_value}}\n        user = (self.username or \"\").strip()\n        pwd = (self.password or \"\").strip()\n        if not user or not pwd:\n            msg = \"Auth Mode is 'basic' but username/password are missing.\"\n            raise ValueError(msg)\n        return {\"http_auth\": (user, pwd)}\n\n    def build_client(self) -> OpenSearch:\n        \"\"\"Create and configure an OpenSearch client instance.\n\n        Returns:\n            Configured OpenSearch client ready for operations\n        \"\"\"\n        auth_kwargs = self._build_auth_kwargs()\n        return OpenSearch(\n            hosts=[self.opensearch_url],\n            use_ssl=self.use_ssl,\n            verify_certs=self.verify_certs,\n            ssl_assert_hostname=False,\n            ssl_show_warn=False,\n            **auth_kwargs,\n        )\n\n    @check_cached_vector_store\n    def build_vector_store(self) -> OpenSearch:\n        # Return raw OpenSearch client as our “vector store.”\n        self.log(self.ingest_data)\n        client = self.build_client()\n        self._add_documents_to_vector_store(client=client)\n        return client\n\n    # ---------- ingest ----------\n    def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n        \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n        This method handles the complete document ingestion pipeline:\n        - Prepares document data and metadata\n        - Generates vector embeddings\n        - Creates appropriate index mappings\n        - Bulk inserts documents with vectors\n\n        Args:\n            client: OpenSearch client for performing operations\n        \"\"\"\n        # Convert DataFrame to Data if needed using parent's method\n        self.ingest_data = self._prepare_ingest_data()\n\n        docs = self.ingest_data or []\n        if not docs:\n            self.log(\"No documents to ingest.\")\n            return\n\n        # Extract texts and metadata from documents\n        texts = []\n        metadatas = []\n        # Process docs_metadata table input into a dict\n        additional_metadata = {}\n        if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n            logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n            if isinstance(self.docs_metadata[-1], Data):\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                self.docs_metadata = self.docs_metadata[-1].data\n                logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n                additional_metadata.update(self.docs_metadata)\n            else:\n                for item in self.docs_metadata:\n                    if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n                        additional_metadata[item[\"key\"]] = item[\"value\"]\n        logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n        for doc_obj in docs:\n            data_copy = json.loads(doc_obj.model_dump_json())\n            text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n            texts.append(text)\n\n            # Merge additional metadata from table input\n            data_copy.update(additional_metadata)\n\n            metadatas.append(data_copy)\n        self.log(metadatas)\n        if not self.embedding:\n            msg = \"Embedding handle is required to embed documents.\"\n            raise ValueError(msg)\n\n        # Generate embeddings\n        vectors = self.embedding.embed_documents(texts)\n\n        if not vectors:\n            self.log(\"No vectors generated from documents.\")\n            return\n\n        # Get vector dimension for mapping\n        dim = len(vectors[0]) if vectors else 768  # default fallback\n\n        # Check for AOSS\n        auth_kwargs = self._build_auth_kwargs()\n        is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n        # Validate engine with AOSS\n        engine = getattr(self, \"engine\", \"jvector\")\n        self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n        # Create mapping with proper KNN settings\n        space_type = getattr(self, \"space_type\", \"l2\")\n        ef_construction = getattr(self, \"ef_construction\", 512)\n        m = getattr(self, \"m\", 16)\n\n        mapping = self._default_text_mapping(\n            dim=dim,\n            engine=engine,\n            space_type=space_type,\n            ef_construction=ef_construction,\n            m=m,\n            vector_field=self.vector_field,\n        )\n\n        self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n        # Use the LangChain-style bulk ingestion\n        return_ids = self._bulk_ingest_embeddings(\n            client=client,\n            index_name=self.index_name,\n            embeddings=vectors,\n            texts=texts,\n            metadatas=metadatas,\n            vector_field=self.vector_field,\n            text_field=\"text\",\n            mapping=mapping,\n            is_aoss=is_aoss,\n        )\n        self.log(metadatas)\n\n        self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n    # ---------- helpers for filters ----------\n    def _is_placeholder_term(self, term_obj: dict) -> bool:\n        # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n        return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n    def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n        \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n        This method accepts two filter formats and converts them to standardized\n        OpenSearch query clauses:\n\n        Format A - Explicit filters:\n        {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n         \"limit\": 10, \"score_threshold\": 1.5}\n\n        Format B - Context-style mapping:\n        {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n        Args:\n            filter_obj: Filter configuration dictionary or None\n\n        Returns:\n            List of OpenSearch filter clauses (term/terms objects)\n            Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n        \"\"\"\n        if not filter_obj:\n            return []\n\n        # If it is a string, try to parse it once\n        if isinstance(filter_obj, str):\n            try:\n                filter_obj = json.loads(filter_obj)\n            except json.JSONDecodeError:\n                # Not valid JSON - treat as no filters\n                return []\n\n        # Case A: already an explicit list/dict under \"filter\"\n        if \"filter\" in filter_obj:\n            raw = filter_obj[\"filter\"]\n            if isinstance(raw, dict):\n                raw = [raw]\n            explicit_clauses: list[dict] = []\n            for f in raw or []:\n                if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n                    explicit_clauses.append(f)\n                elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n                    field, vals = next(iter(f[\"terms\"].items()))\n                    if isinstance(vals, list) and len(vals) > 0:\n                        explicit_clauses.append(f)\n            return explicit_clauses\n\n        # Case B: convert context-style maps into clauses\n        field_mapping = {\n            \"data_sources\": \"filename\",\n            \"document_types\": \"mimetype\",\n            \"owners\": \"owner\",\n        }\n        context_clauses: list[dict] = []\n        for k, values in filter_obj.items():\n            if not isinstance(values, list):\n                continue\n            field = field_mapping.get(k, k)\n            if len(values) == 0:\n                # Match-nothing placeholder (kept to mirror your tool semantics)\n                context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n            elif len(values) == 1:\n                if values[0] != \"__IMPOSSIBLE_VALUE__\":\n                    context_clauses.append({\"term\": {field: values[0]}})\n            else:\n                context_clauses.append({\"terms\": {field: values}})\n        return context_clauses\n\n    # ---------- search (single hybrid path matching your tool) ----------\n    def search(self, query: str | None = None) -> list[dict[str, Any]]:\n        \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n        This method executes a sophisticated search that combines:\n        - K-nearest neighbor (KNN) vector similarity search (70% weight)\n        - Multi-field keyword search with fuzzy matching (30% weight)\n        - Optional filtering and score thresholds\n        - Aggregations for faceted search results\n\n        Args:\n            query: Search query string (used for both vector embedding and keyword search)\n\n        Returns:\n            List of search results with page_content, metadata, and relevance scores\n\n        Raises:\n            ValueError: If embedding component is not provided or filter JSON is invalid\n        \"\"\"\n        logger.info(self.ingest_data)\n        client = self.build_client()\n        q = (query or \"\").strip()\n\n        # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n        filter_obj = None\n        if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n            try:\n                filter_obj = json.loads(self.filter_expression)\n            except json.JSONDecodeError as e:\n                msg = f\"Invalid filter_expression JSON: {e}\"\n                raise ValueError(msg) from e\n\n        if not self.embedding:\n            msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n            raise ValueError(msg)\n\n        # Embed the query\n        vec = self.embedding.embed_query(q)\n\n        # Build filter clauses (accept both shapes)\n        filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n        # Respect the tool's limit/threshold defaults\n        limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n        score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n        # Build the same hybrid body as your SearchService\n        body = {\n            \"query\": {\n                \"bool\": {\n                    \"should\": [\n                        {\n                            \"knn\": {\n                                self.vector_field: {\n                                    \"vector\": vec,\n                                    \"k\": 10,  # fixed to match the tool\n                                    \"boost\": 0.7,\n                                }\n                            }\n                        },\n                        {\n                            \"multi_match\": {\n                                \"query\": q,\n                                \"fields\": [\"text^2\", \"filename^1.5\"],\n                                \"type\": \"best_fields\",\n                                \"fuzziness\": \"AUTO\",\n                                \"boost\": 0.3,\n                            }\n                        },\n                    ],\n                    \"minimum_should_match\": 1,\n                }\n            },\n            \"aggs\": {\n                \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n                \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n                \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n            },\n            \"_source\": [\n                \"filename\",\n                \"mimetype\",\n                \"page\",\n                \"text\",\n                \"source_url\",\n                \"owner\",\n                \"allowed_users\",\n                \"allowed_groups\",\n            ],\n            \"size\": limit,\n        }\n        if filter_clauses:\n            body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n        if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n            # top-level min_score (matches your tool)\n            body[\"min_score\"] = score_threshold\n\n        resp = client.search(index=self.index_name, body=body)\n        hits = resp.get(\"hits\", {}).get(\"hits\", [])\n        return [\n            {\n                \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n                \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n                \"score\": hit.get(\"_score\"),\n            }\n            for hit in hits\n        ]\n\n    def search_documents(self) -> list[Data]:\n        \"\"\"Search documents and return results as Data objects.\n\n        This is the main interface method that performs the search using the\n        configured search_query and returns results in Langflow's Data format.\n\n        Returns:\n            List of Data objects containing search results with text and metadata\n\n        Raises:\n            Exception: If search operation fails\n        \"\"\"\n        try:\n            raw = self.search(self.search_query or \"\")\n            return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n            self.log(self.ingest_data)\n        except Exception as e:\n            self.log(f\"search_documents error: {e}\")\n            raise\n\n    # -------- dynamic UI handling (auth switch) --------\n    async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n        \"\"\"Dynamically update component configuration based on field changes.\n\n        This method handles real-time UI updates, particularly for authentication\n        mode changes that show/hide relevant input fields.\n\n        Args:\n            build_config: Current component configuration\n            field_value: New value for the changed field\n            field_name: Name of the field that changed\n\n        Returns:\n            Updated build configuration with appropriate field visibility\n        \"\"\"\n        try:\n            if field_name == \"auth_mode\":\n                mode = (field_value or \"basic\").strip().lower()\n                is_basic = mode == \"basic\"\n                is_jwt = mode == \"jwt\"\n\n                build_config[\"username\"][\"show\"] = is_basic\n                build_config[\"password\"][\"show\"] = is_basic\n\n                build_config[\"jwt_token\"][\"show\"] = is_jwt\n                build_config[\"jwt_header\"][\"show\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n                build_config[\"username\"][\"required\"] = is_basic\n                build_config[\"password\"][\"required\"] = is_basic\n\n                build_config[\"jwt_token\"][\"required\"] = is_jwt\n                build_config[\"jwt_header\"][\"required\"] = is_jwt\n                build_config[\"bearer_prefix\"][\"required\"] = False\n\n                if is_basic:\n                    build_config[\"jwt_token\"][\"value\"] = \"\"\n\n                return build_config\n\n        except (KeyError, ValueError) as e:\n            self.log(f\"update_build_config error: {e}\")\n\n        return build_config\n"
               },
               "docs_metadata": {
                 "_input_type": "TableInput",
-                "advanced": true,
+                "advanced": false,
                 "display_name": "Document Metadata",
                 "dynamic": false,
                 "info": "Additional metadata key-value pairs to be added to all ingested documents. Useful for tagging documents with source information, categories, or other custom attributes.",
+                "input_types": [
+                  "Data"
+                ],
                 "is_list": true,
                 "list_add_label": "Add More",
                 "name": "docs_metadata",
@@ -798,12 +826,14 @@
                   {
                     "description": "Key name",
                     "display_name": "Key",
+                    "formatter": "text",
                     "name": "key",
                     "type": "str"
                   },
                   {
                     "description": "Value of the metadata",
                     "display_name": "Value",
+                    "formatter": "text",
                     "name": "value",
                     "type": "str"
                   }
@@ -1285,7 +1315,7 @@
         "dragging": false,
         "id": "OpenSearch-iYfjf",
         "measured": {
-          "height": 763,
+          "height": 848,
           "width": 320
         },
         "position": {
@@ -1323,7 +1353,7 @@
             ],
             "frozen": false,
             "icon": "binary",
-            "last_updated": "2025-09-26T05:15:05.781Z",
+            "last_updated": "2025-09-26T06:57:25.121Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -1648,7 +1678,7 @@
             ],
             "frozen": false,
             "icon": "bot",
-            "last_updated": "2025-09-26T05:15:05.782Z",
+            "last_updated": "2025-09-26T07:03:41.442Z",
             "legacy": false,
             "lf_version": "1.6.0",
             "metadata": {
@@ -1744,7 +1774,7 @@
                 "advanced": false,
                 "combobox": false,
                 "dialog_inputs": {},
-                "display_name": "Model Provider",
+                "display_name": "Language Model",
                 "dynamic": false,
                 "external_options": {
                   "fields": {
@@ -1758,8 +1788,9 @@
                   }
                 },
                 "info": "The provider of the language model that the agent will use to generate responses.",
-                "input_types": [],
-                "load_from_db": false,
+                "input_types": [
+                  "LanguageModel"
+                ],
                 "name": "agent_llm",
                 "options": [
                   "Anthropic",
@@ -1777,7 +1808,7 @@
                     "icon": "OpenAI"
                   }
                 ],
-                "placeholder": "",
+                "placeholder": "Awaiting model input.",
                 "real_time_refresh": true,
                 "refresh_button": false,
                 "required": false,
@@ -1787,25 +1818,7 @@
                 "tool_mode": false,
                 "trace_as_metadata": true,
                 "type": "str",
-                "value": "OpenAI"
-              },
-              "api_key": {
-                "_input_type": "SecretStrInput",
-                "advanced": false,
-                "display_name": "OpenAI API Key",
-                "dynamic": false,
-                "info": "The OpenAI API Key to use for the OpenAI model.",
-                "input_types": [],
-                "load_from_db": true,
-                "name": "api_key",
-                "password": true,
-                "placeholder": "",
-                "real_time_refresh": true,
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "type": "str",
-                "value": "OPENAI_API_KEY"
+                "value": ""
               },
               "code": {
                 "advanced": true,
@@ -1893,25 +1906,6 @@
                 "type": "str",
                 "value": ""
               },
-              "json_mode": {
-                "_input_type": "BoolInput",
-                "advanced": true,
-                "display_name": "JSON Mode",
-                "dynamic": false,
-                "info": "If True, it will output JSON regardless of passing a schema.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "json_mode",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "bool",
-                "value": false
-              },
               "max_iterations": {
                 "_input_type": "IntInput",
                 "advanced": true,
@@ -1931,113 +1925,6 @@
                 "type": "int",
                 "value": 15
               },
-              "max_retries": {
-                "_input_type": "IntInput",
-                "advanced": true,
-                "display_name": "Max Retries",
-                "dynamic": false,
-                "info": "The maximum number of retries to make when generating.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "max_retries",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "int",
-                "value": 5
-              },
-              "max_tokens": {
-                "_input_type": "IntInput",
-                "advanced": true,
-                "display_name": "Max Tokens",
-                "dynamic": false,
-                "info": "The maximum number of tokens to generate. Set to 0 for unlimited tokens.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "max_tokens",
-                "placeholder": "",
-                "range_spec": {
-                  "max": 128000,
-                  "min": 0,
-                  "step": 0.1,
-                  "step_type": "float"
-                },
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "int",
-                "value": ""
-              },
-              "model_kwargs": {
-                "_input_type": "DictInput",
-                "advanced": true,
-                "display_name": "Model Kwargs",
-                "dynamic": false,
-                "info": "Additional keyword arguments to pass to the model.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "model_kwargs",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_input": true,
-                "type": "dict",
-                "value": {}
-              },
-              "model_name": {
-                "_input_type": "DropdownInput",
-                "advanced": false,
-                "combobox": true,
-                "dialog_inputs": {},
-                "display_name": "Model Name",
-                "dynamic": false,
-                "external_options": {},
-                "info": "To see the model names, first choose a provider. Then, enter your API key and click the refresh button next to the model name.",
-                "input_types": [],
-                "name": "model_name",
-                "options": [
-                  "gpt-4o-mini",
-                  "gpt-4o",
-                  "gpt-4.1",
-                  "gpt-4.1-mini",
-                  "gpt-4.1-nano",
-                  "gpt-4-turbo",
-                  "gpt-4-turbo-preview",
-                  "gpt-4",
-                  "gpt-3.5-turbo",
-                  "gpt-5",
-                  "gpt-5-mini",
-                  "gpt-5-nano",
-                  "gpt-5-chat-latest",
-                  "o1",
-                  "o3-mini",
-                  "o3",
-                  "o3-pro",
-                  "o4-mini",
-                  "o4-mini-high"
-                ],
-                "options_metadata": [],
-                "placeholder": "",
-                "real_time_refresh": false,
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "toggle": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "str",
-                "value": "gpt-4.1"
-              },
               "n_messages": {
                 "_input_type": "IntInput",
                 "advanced": true,
@@ -2057,26 +1944,6 @@
                 "type": "int",
                 "value": 100
               },
-              "openai_api_base": {
-                "_input_type": "StrInput",
-                "advanced": true,
-                "display_name": "OpenAI API Base",
-                "dynamic": false,
-                "info": "The base URL of the OpenAI API. Defaults to https://api.openai.com/v1. You can change this to use other APIs like JinaChat, LocalAI and Prem.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "load_from_db": false,
-                "name": "openai_api_base",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "str",
-                "value": ""
-              },
               "output_schema": {
                 "_input_type": "TableInput",
                 "advanced": true,
@@ -2140,25 +2007,6 @@
                 "type": "table",
                 "value": []
               },
-              "seed": {
-                "_input_type": "IntInput",
-                "advanced": true,
-                "display_name": "Seed",
-                "dynamic": false,
-                "info": "The seed controls the reproducibility of the job.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "seed",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "int",
-                "value": 1
-              },
               "system_prompt": {
                 "_input_type": "MultilineInput",
                 "advanced": false,
@@ -2184,54 +2032,6 @@
                 "type": "str",
                 "value": "You are a helpful assistant that can use tools to answer questions and perform tasks."
               },
-              "temperature": {
-                "_input_type": "SliderInput",
-                "advanced": true,
-                "display_name": "Temperature",
-                "dynamic": false,
-                "info": "",
-                "input_types": [],
-                "max_label": "",
-                "max_label_icon": "",
-                "min_label": "",
-                "min_label_icon": "",
-                "name": "temperature",
-                "placeholder": "",
-                "range_spec": {
-                  "max": 1,
-                  "min": 0,
-                  "step": 0.01,
-                  "step_type": "float"
-                },
-                "required": false,
-                "show": true,
-                "slider_buttons": false,
-                "slider_buttons_options": [],
-                "slider_input": false,
-                "title_case": false,
-                "tool_mode": false,
-                "type": "slider",
-                "value": 0.1
-              },
-              "timeout": {
-                "_input_type": "IntInput",
-                "advanced": true,
-                "display_name": "Timeout",
-                "dynamic": false,
-                "info": "The timeout for requests to OpenAI completion API.",
-                "input_types": [],
-                "list": false,
-                "list_add_label": "Add More",
-                "name": "timeout",
-                "placeholder": "",
-                "required": false,
-                "show": true,
-                "title_case": false,
-                "tool_mode": false,
-                "trace_as_metadata": true,
-                "type": "int",
-                "value": 700
-              },
               "tools": {
                 "_input_type": "HandleInput",
                 "advanced": false,
@@ -2281,14 +2081,14 @@
         "dragging": false,
         "id": "Agent-crjWf",
         "measured": {
-          "height": 594,
+          "height": 429,
           "width": 320
         },
         "position": {
           "x": 1686.5732118555798,
           "y": 317.94354236557473
         },
-        "selected": false,
+        "selected": true,
         "type": "genericNode"
       },
       {
@@ -2417,7 +2217,7 @@
             ],
             "frozen": false,
             "icon": "brain-circuit",
-            "last_updated": "2025-09-26T05:15:05.784Z",
+            "last_updated": "2025-09-26T06:57:25.122Z",
             "legacy": false,
             "metadata": {
               "code_hash": "bb5f8714781b",
@@ -2722,7 +2522,7 @@
       }
     ],
     "viewport": {
-      "x": -234.8770309457758,
+      "x": -337.8770309457759,
       "y": 153.7254076573895,
       "zoom": 0.6026322796158203
     }
diff --git a/src/main.py b/src/main.py
index c4039b45..10592e47 100644
--- a/src/main.py
+++ b/src/main.py
@@ -335,6 +335,10 @@ async def _ingest_default_documents_langflow(services, file_paths):
                 settings=None,  # Use default ingestion settings
                 jwt_token=effective_jwt,  # Use JWT token (anonymous if needed)
                 delete_after_ingest=True,  # Clean up after ingestion
+                owner=None,
+                owner_name=anonymous_user.name,
+                owner_email=anonymous_user.email,
+                connector_type="system_default",
             )
 
             logger.info(
diff --git a/src/services/langflow_file_service.py b/src/services/langflow_file_service.py
index 41971226..7ffdd3aa 100644
--- a/src/services/langflow_file_service.py
+++ b/src/services/langflow_file_service.py
@@ -98,7 +98,7 @@ class LangflowFileService:
 
         # Pass metadata via tweaks to OpenSearch component
         metadata_tweaks = []
-        if owner:
+        if owner or owner is None:
             metadata_tweaks.append({"key": "owner", "value": owner})
         if owner_name:
             metadata_tweaks.append({"key": "owner_name", "value": owner_name})