From 6c628dceb3f59e4fbecc7a367c93f39805b2466b Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Mon, 29 Sep 2025 14:53:47 -0400 Subject: [PATCH] Update to components and flows --- .DS_Store | Bin 8196 -> 8196 bytes flows/components/ollama_embedding.json | 312 +++--- flows/components/ollama_llm.json | 1368 ++++++++++++----------- flows/components/ollama_llm_text.json | 1365 +++++++++++----------- flows/components/watsonx_embedding.json | 247 +--- flows/components/watsonx_llm.json | 1070 +++++++++--------- flows/components/watsonx_llm_text.json | 1071 +++++++++--------- flows/openrag_ingest_docling.json | 1014 +++++++++++++++-- flows/openrag_nudges.json | 27 +- 9 files changed, 3551 insertions(+), 2923 deletions(-) diff --git a/.DS_Store b/.DS_Store index e98b18b1f3987fb5a012ac82167e178fa2778dad..ca39b6e347a514559b8ebc1e3e1057a1eda59931 100644 GIT binary patch delta 40 mcmZp1XmQx!EzWLcXs)ARY%zJIfWziV;yc-q1tw1tF9HDfUJNw= delta 42 ncmZp1XmQx!EzV(VWT~TIYGF2cm4N-`N#Z-%QA8$B5-$P(40Q~r diff --git a/flows/components/ollama_embedding.json b/flows/components/ollama_embedding.json index 01b83c44..efb12882 100644 --- a/flows/components/ollama_embedding.json +++ b/flows/components/ollama_embedding.json @@ -1,157 +1,171 @@ { "data": { - "id": "OllamaEmbeddings-4ah5Q", - "node": { - "base_classes": [ - "Embeddings" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate embeddings using Ollama models.", - "display_name": "Ollama Embeddings", - "documentation": "https://python.langchain.com/docs/integrations/text_embedding/ollama", - "edited": false, - "field_order": [ - "model_name", - "base_url" - ], - "frozen": false, - "icon": "Ollama", - "last_updated": "2025-09-22T20:18:27.128Z", - "legacy": false, - "metadata": { - "code_hash": "0db0f99e91e9", - "dependencies": { - "dependencies": [ - { - "name": "httpx", - "version": "0.28.1" + "edges": [], + "nodes": [ + { + "data": { + "node": { + "template": { + "_type": "Component", + "base_url": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": true, + "placeholder": "", + "show": true, + "name": "base_url", + "value": "http://host.docker.internal:11434", + "display_name": "Ollama Base URL", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "code": { + "type": "code", + "required": true, + "placeholder": "", + "list": false, + "show": true, + "multiline": true, + "value": "from typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import OllamaEmbeddings\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.base.models.ollama_constants import OLLAMA_EMBEDDING_MODELS, URL_LIST\nfrom lfx.field_typing import Embeddings\nfrom lfx.io import DropdownInput, MessageTextInput, Output\n\nHTTP_STATUS_OK = 200\n\n\nclass OllamaEmbeddingsComponent(LCModelComponent):\n display_name: str = \"Ollama Embeddings\"\n description: str = \"Generate embeddings using Ollama models.\"\n documentation = \"https://python.langchain.com/docs/integrations/text_embedding/ollama\"\n icon = \"Ollama\"\n name = \"OllamaEmbeddings\"\n\n inputs = [\n DropdownInput(\n name=\"model_name\",\n display_name=\"Ollama Model\",\n value=\"\",\n options=[],\n real_time_refresh=True,\n refresh_button=True,\n combobox=True,\n required=True,\n ),\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Ollama Base URL\",\n value=\"\",\n required=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Embeddings\", name=\"embeddings\", method=\"build_embeddings\"),\n ]\n\n def build_embeddings(self) -> Embeddings:\n try:\n output = OllamaEmbeddings(model=self.model_name, base_url=self.base_url)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n return output\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name in {\"base_url\", \"model_name\"} and not await self.is_valid_ollama_url(field_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n for url in URL_LIST:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n build_config[\"base_url\"][\"value\"] = valid_url\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n build_config[\"model_name\"][\"options\"] = await self.get_model(self.base_url)\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n build_config[\"model_name\"][\"options\"] = await self.get_model(build_config[\"base_url\"].get(\"value\", \"\"))\n else:\n build_config[\"model_name\"][\"options\"] = []\n\n return build_config\n\n async def get_model(self, base_url_value: str) -> list[str]:\n \"\"\"Get the model names from Ollama.\"\"\"\n model_ids = []\n try:\n url = urljoin(base_url_value, \"/api/tags\")\n async with httpx.AsyncClient() as client:\n response = await client.get(url)\n response.raise_for_status()\n data = response.json()\n\n model_ids = [model[\"name\"] for model in data.get(\"models\", [])]\n # this to ensure that not embedding models are included.\n # not even the base models since models can have 1b 2b etc\n # handles cases when embeddings models have tags like :latest - etc.\n model_ids = [\n model\n for model in model_ids\n if any(model.startswith(f\"{embedding_model}\") for embedding_model in OLLAMA_EMBEDDING_MODELS)\n ]\n\n except (ImportError, ValueError, httpx.RequestError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(f\"{url}/api/tags\")).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n", + "fileTypes": [], + "file_path": "", + "password": false, + "name": "code", + "advanced": true, + "dynamic": true, + "info": "", + "load_from_db": false, + "title_case": false + }, + "model_name": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [ + "nomic-embed-text:latest", + "all-minilm:latest" + ], + "options_metadata": [], + "combobox": true, + "dialog_inputs": {}, + "toggle": false, + "required": true, + "placeholder": "", + "show": true, + "name": "model_name", + "value": "", + "display_name": "Ollama Model", + "advanced": false, + "dynamic": false, + "info": "", + "real_time_refresh": true, + "refresh_button": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + } }, - { - "name": "langchain_ollama", - "version": "0.2.1" + "description": "Generate embeddings using Ollama models.", + "icon": "Ollama", + "base_classes": [ + "Embeddings" + ], + "display_name": "Ollama Embeddings", + "documentation": "https://python.langchain.com/docs/integrations/text_embedding/ollama", + "minimized": false, + "custom_fields": {}, + "output_types": [], + "pinned": false, + "conditional_paths": [], + "frozen": false, + "outputs": [ + { + "types": [ + "Embeddings" + ], + "selected": "Embeddings", + "name": "embeddings", + "display_name": "Embeddings", + "method": "build_embeddings", + "value": "__UNDEFINED__", + "cache": true, + "required_inputs": null, + "allows_loop": false, + "group_outputs": false, + "options": null, + "tool_mode": true + } + ], + "field_order": [ + "model_name", + "base_url" + ], + "beta": false, + "legacy": false, + "edited": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ], + "module": "lfx.components.ollama.ollama_embeddings.OllamaEmbeddingsComponent", + "code_hash": "c41821735548", + "dependencies": { + "total_dependencies": 3, + "dependencies": [ + { + "name": "httpx", + "version": "0.28.1" + }, + { + "name": "langchain_ollama", + "version": "0.2.1" + }, + { + "name": "lfx", + "version": null + } + ] + } }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 3 + "tool_mode": false, + "last_updated": "2025-09-29T18:40:10.242Z", + "official": false + }, + "showNode": true, + "type": "OllamaEmbeddings", + "id": "OllamaEmbeddings-vnNn8" }, - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ], - "module": "langflow.components.ollama.ollama_embeddings.OllamaEmbeddingsComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Embeddings", - "group_outputs": false, - "method": "build_embeddings", - "name": "embeddings", - "options": null, - "required_inputs": null, - "selected": "Embeddings", - "tool_mode": true, - "types": [ - "Embeddings" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "base_url": { - "_input_type": "MessageTextInput", - "advanced": false, - "display_name": "Ollama Base URL", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "base_url", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "OLLAMA_BASE_URL" + "id": "OllamaEmbeddings-vnNn8", + "position": { + "x": 0, + "y": 0 }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "from typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import OllamaEmbeddings\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.ollama_constants import OLLAMA_EMBEDDING_MODELS, URL_LIST\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import DropdownInput, MessageTextInput, Output\n\nHTTP_STATUS_OK = 200\n\n\nclass OllamaEmbeddingsComponent(LCModelComponent):\n display_name: str = \"Ollama Embeddings\"\n description: str = \"Generate embeddings using Ollama models.\"\n documentation = \"https://python.langchain.com/docs/integrations/text_embedding/ollama\"\n icon = \"Ollama\"\n name = \"OllamaEmbeddings\"\n\n inputs = [\n DropdownInput(\n name=\"model_name\",\n display_name=\"Ollama Model\",\n value=\"\",\n options=[],\n real_time_refresh=True,\n refresh_button=True,\n combobox=True,\n required=True,\n ),\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Ollama Base URL\",\n value=\"\",\n required=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Embeddings\", name=\"embeddings\", method=\"build_embeddings\"),\n ]\n\n def build_embeddings(self) -> Embeddings:\n try:\n output = OllamaEmbeddings(model=self.model_name, base_url=self.base_url)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n return output\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name in {\"base_url\", \"model_name\"} and not await self.is_valid_ollama_url(field_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n for url in URL_LIST:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n build_config[\"base_url\"][\"value\"] = valid_url\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n build_config[\"model_name\"][\"options\"] = await self.get_model(self.base_url)\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n build_config[\"model_name\"][\"options\"] = await self.get_model(build_config[\"base_url\"].get(\"value\", \"\"))\n else:\n build_config[\"model_name\"][\"options\"] = []\n\n return build_config\n\n async def get_model(self, base_url_value: str) -> list[str]:\n \"\"\"Get the model names from Ollama.\"\"\"\n model_ids = []\n try:\n url = urljoin(base_url_value, \"/api/tags\")\n async with httpx.AsyncClient() as client:\n response = await client.get(url)\n response.raise_for_status()\n data = response.json()\n\n model_ids = [model[\"name\"] for model in data.get(\"models\", [])]\n # this to ensure that not embedding models are included.\n # not even the base models since models can have 1b 2b etc\n # handles cases when embeddings models have tags like :latest - etc.\n model_ids = [\n model\n for model in model_ids\n if any(model.startswith(f\"{embedding_model}\") for embedding_model in OLLAMA_EMBEDDING_MODELS)\n ]\n\n except (ImportError, ValueError, httpx.RequestError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(f\"{url}/api/tags\")).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n" - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": true, - "dialog_inputs": {}, - "display_name": "Ollama Model", - "dynamic": false, - "info": "", - "name": "model_name", - "options": [ - "all-minilm:latest" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "all-minilm:latest" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "OllamaEmbeddings" + "type": "genericNode" + } + ], + "viewport": { + "x": 1, + "y": 1, + "zoom": 1 + } }, - "dragging": false, - "id": "OllamaEmbeddings-4ah5Q", - "measured": { - "height": 286, - "width": 320 - }, - "position": { - "x": 282.29416840859585, - "y": 279.4218065717267 - }, - "selected": false, - "type": "genericNode" + "description": "Generate embeddings using Ollama models.", + "name": "Ollama Embeddings", + "id": "OllamaEmbeddings-vnNn8", + "is_component": true, + "last_tested_version": "1.6.0" } \ No newline at end of file diff --git a/flows/components/ollama_llm.json b/flows/components/ollama_llm.json index 0edf7f13..44e9d347 100644 --- a/flows/components/ollama_llm.json +++ b/flows/components/ollama_llm.json @@ -1,687 +1,699 @@ { "data": { - "id": "OllamaModel-eCsJx", - "node": { - "base_classes": [ - "LanguageModel", - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate text using Ollama Local LLMs.", - "display_name": "Ollama", - "documentation": "", - "edited": false, - "field_order": [ - "base_url", - "model_name", - "temperature", - "format", - "metadata", - "mirostat", - "mirostat_eta", - "mirostat_tau", - "num_ctx", - "num_gpu", - "num_thread", - "repeat_last_n", - "repeat_penalty", - "tfs_z", - "timeout", - "top_k", - "top_p", - "verbose", - "tags", - "stop_tokens", - "system", - "tool_model_enabled", - "template", - "input_value", - "system_message", - "stream" - ], - "frozen": false, - "icon": "Ollama", - "last_updated": "2025-09-22T20:14:45.057Z", - "legacy": false, - "metadata": { - "code_hash": "af399d429d23", - "dependencies": { - "dependencies": [ - { - "name": "httpx", - "version": "0.28.1" + "edges": [], + "nodes": [ + { + "data": { + "node": { + "template": { + "_type": "Component", + "base_url": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "base_url", + "value": "", + "display_name": "Base URL", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Endpoint of the Ollama API.", + "real_time_refresh": true, + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "code": { + "type": "code", + "required": true, + "placeholder": "", + "list": false, + "show": true, + "multiline": true, + "value": "import asyncio\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.base.models.ollama_constants import URL_LIST\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SliderInput\nfrom lfx.log.logger import logger\n\nHTTP_STATUS_OK = 200\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n inputs = [\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Base URL\",\n info=\"Endpoint of the Ollama API.\",\n value=\"\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n MessageTextInput(\n name=\"format\", display_name=\"Format\", info=\"Specify the format of the output (e.g., json).\", advanced=True\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(name=\"verbose\", display_name=\"Verbose\", info=\"Whether to print out response text.\", advanced=True),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n *LCModelComponent.get_base_inputs(),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to 0 for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, 0)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value == 0:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": self.base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": self.format,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.verbose,\n \"template\": self.template,\n }\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(urljoin(url, \"api/tags\"))).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"base_url\", \"model_name\"}:\n if build_config[\"base_url\"].get(\"load_from_db\", False):\n base_url_value = await self.get_variables(build_config[\"base_url\"].get(\"value\", \"\"), \"base_url\")\n else:\n base_url_value = build_config[\"base_url\"].get(\"value\", \"\")\n\n if not await self.is_valid_ollama_url(base_url_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n check_urls = URL_LIST\n if self.base_url:\n check_urls = [self.base_url, *URL_LIST]\n for url in check_urls:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n if valid_url != \"\":\n build_config[\"base_url\"][\"value\"] = valid_url\n else:\n msg = \"No valid Ollama URL found.\"\n raise ValueError(msg)\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n self.base_url, tool_model_enabled=tool_model_enabled\n )\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n build_config[\"base_url\"].get(\"value\", \"\"), tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API that do not have the \"embedding\" capability.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names that do not have the \"embedding\" capability. If\n `tool_model_enabled` is True, only models supporting tool calling are included.\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Normalize the base URL to avoid the repeated \"/\" at the end\n base_url = base_url_value.rstrip(\"/\") + \"/\"\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n # Fetch available models\n tags_response = await client.get(tags_url)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(show_url, json=payload)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY, [])\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n if self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n", + "fileTypes": [], + "file_path": "", + "password": false, + "name": "code", + "advanced": true, + "dynamic": true, + "info": "", + "load_from_db": false, + "title_case": false + }, + "format": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "format", + "value": "", + "display_name": "Format", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Specify the format of the output (e.g., json).", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "input_value": { + "trace_as_input": true, + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "input_value", + "value": "", + "display_name": "Input", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "", + "title_case": false, + "type": "str", + "_input_type": "MessageInput" + }, + "metadata": { + "tool_mode": false, + "trace_as_input": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "metadata", + "value": {}, + "display_name": "Metadata", + "advanced": true, + "dynamic": false, + "info": "Metadata to add to the run trace.", + "title_case": false, + "type": "dict", + "_input_type": "DictInput" + }, + "mirostat": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [ + "Disabled", + "Mirostat", + "Mirostat 2.0" + ], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat", + "value": "Disabled", + "display_name": "Mirostat", + "advanced": true, + "dynamic": false, + "info": "Enable/disable Mirostat sampling for controlling perplexity.", + "real_time_refresh": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "mirostat_eta": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat_eta", + "value": "", + "display_name": "Mirostat Eta", + "advanced": true, + "dynamic": false, + "info": "Learning rate for Mirostat algorithm. (Default: 0.1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "mirostat_tau": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat_tau", + "value": "", + "display_name": "Mirostat Tau", + "advanced": true, + "dynamic": false, + "info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "model_name": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "model_name", + "value": "", + "display_name": "Model Name", + "advanced": false, + "dynamic": false, + "info": "Refer to https://ollama.com/library for more models.", + "real_time_refresh": true, + "refresh_button": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "num_ctx": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_ctx", + "value": "", + "display_name": "Context Window Size", + "advanced": true, + "dynamic": false, + "info": "Size of the context window for generating tokens. (Default: 2048)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "num_gpu": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_gpu", + "value": "", + "display_name": "Number of GPUs", + "advanced": true, + "dynamic": false, + "info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "num_thread": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_thread", + "value": "", + "display_name": "Number of Threads", + "advanced": true, + "dynamic": false, + "info": "Number of threads to use during computation. (Default: detected for optimal performance)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "repeat_last_n": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "repeat_last_n", + "value": "", + "display_name": "Repeat Last N", + "advanced": true, + "dynamic": false, + "info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "repeat_penalty": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "repeat_penalty", + "value": "", + "display_name": "Repeat Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for repetitions in generated text. (Default: 1.1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "stop_tokens": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stop_tokens", + "value": "", + "display_name": "Stop Tokens", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Comma-separated list of tokens to signal the model to stop generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "stream": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stream", + "value": false, + "display_name": "Stream", + "advanced": true, + "dynamic": false, + "info": "Stream the response from the model. Streaming works only in Chat.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "system": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system", + "value": "", + "display_name": "System", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System to use for generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "system_message": { + "tool_mode": false, + "trace_as_input": true, + "multiline": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system_message", + "value": "", + "display_name": "System Message", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System message to pass to the model.", + "title_case": false, + "copy_field": false, + "type": "str", + "_input_type": "MultilineInput" + }, + "tags": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tags", + "value": "", + "display_name": "Tags", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Comma-separated list of tags to add to the run trace.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "temperature": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 1, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "temperature", + "value": 0.1, + "display_name": "Temperature", + "advanced": true, + "dynamic": false, + "info": "", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "template": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "template", + "value": "", + "display_name": "Template", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Template to use for generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "tfs_z": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tfs_z", + "value": "", + "display_name": "TFS Z", + "advanced": true, + "dynamic": false, + "info": "Tail free sampling value. (Default: 1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "timeout": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "timeout", + "value": "", + "display_name": "Timeout", + "advanced": true, + "dynamic": false, + "info": "Timeout for the request stream.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "tool_model_enabled": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tool_model_enabled", + "value": true, + "display_name": "Tool Model Enabled", + "advanced": false, + "dynamic": false, + "info": "Whether to enable tool calling in the model.", + "real_time_refresh": true, + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "top_k": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_k", + "value": "", + "display_name": "Top K", + "advanced": true, + "dynamic": false, + "info": "Limits token selection to top K. (Default: 40)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "top_p": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_p", + "value": "", + "display_name": "Top P", + "advanced": true, + "dynamic": false, + "info": "Works together with top-k. (Default: 0.9)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "verbose": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "verbose", + "value": false, + "display_name": "Verbose", + "advanced": true, + "dynamic": false, + "info": "Whether to print out response text.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + } }, - { - "name": "langchain_ollama", - "version": "0.2.1" + "description": "Generate text using Ollama Local LLMs.", + "icon": "Ollama", + "base_classes": [ + "LanguageModel", + "Message" + ], + "display_name": "Ollama", + "documentation": "", + "minimized": false, + "custom_fields": {}, + "output_types": [], + "pinned": false, + "conditional_paths": [], + "frozen": false, + "outputs": [ + { + "types": [ + "Message" + ], + "name": "text_output", + "display_name": "Model Response", + "method": "text_response", + "value": "__UNDEFINED__", + "cache": true, + "required_inputs": null, + "allows_loop": false, + "group_outputs": false, + "options": null, + "tool_mode": true + }, + { + "types": [ + "LanguageModel" + ], + "selected": "LanguageModel", + "name": "model_output", + "display_name": "Language Model", + "method": "build_model", + "value": "__UNDEFINED__", + "cache": true, + "required_inputs": null, + "allows_loop": false, + "group_outputs": false, + "options": null, + "tool_mode": true + } + ], + "field_order": [ + "base_url", + "model_name", + "temperature", + "format", + "metadata", + "mirostat", + "mirostat_eta", + "mirostat_tau", + "num_ctx", + "num_gpu", + "num_thread", + "repeat_last_n", + "repeat_penalty", + "tfs_z", + "timeout", + "top_k", + "top_p", + "verbose", + "tags", + "stop_tokens", + "system", + "tool_model_enabled", + "template", + "input_value", + "system_message", + "stream" + ], + "beta": false, + "legacy": false, + "edited": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ], + "module": "lfx.components.ollama.ollama.ChatOllamaComponent", + "code_hash": "54de3b5da388", + "dependencies": { + "total_dependencies": 3, + "dependencies": [ + { + "name": "httpx", + "version": "0.28.1" + }, + { + "name": "langchain_ollama", + "version": "0.2.1" + }, + { + "name": "lfx", + "version": null + } + ] + } }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 3 - }, - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ], - "module": "langflow.components.ollama.ollama.ChatOllamaComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Model Response", - "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "selected": "LanguageModel", - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "base_url": { - "_input_type": "MessageTextInput", - "advanced": false, - "display_name": "Base URL", - "dynamic": false, - "info": "Endpoint of the Ollama API.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "base_url", - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "OLLAMA_BASE_URL" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import asyncio\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.ollama_constants import URL_LIST\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SliderInput\nfrom langflow.logging import logger\n\nHTTP_STATUS_OK = 200\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n inputs = [\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Base URL\",\n info=\"Endpoint of the Ollama API.\",\n value=\"\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n MessageTextInput(\n name=\"format\", display_name=\"Format\", info=\"Specify the format of the output (e.g., json).\", advanced=True\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(name=\"verbose\", display_name=\"Verbose\", info=\"Whether to print out response text.\", advanced=True),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n *LCModelComponent._base_inputs,\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to 0 for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, 0)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value == 0:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": self.base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": self.format,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.verbose,\n \"template\": self.template,\n }\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(urljoin(url, \"api/tags\"))).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"base_url\", \"model_name\"}:\n if build_config[\"base_url\"].get(\"load_from_db\", False):\n base_url_value = await self.get_variables(build_config[\"base_url\"].get(\"value\", \"\"), \"base_url\")\n else:\n base_url_value = build_config[\"base_url\"].get(\"value\", \"\")\n\n if not await self.is_valid_ollama_url(base_url_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n check_urls = URL_LIST\n if self.base_url:\n check_urls = [self.base_url, *URL_LIST]\n for url in check_urls:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n if valid_url != \"\":\n build_config[\"base_url\"][\"value\"] = valid_url\n else:\n msg = \"No valid Ollama URL found.\"\n raise ValueError(msg)\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n self.base_url, tool_model_enabled=tool_model_enabled\n )\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n build_config[\"base_url\"].get(\"value\", \"\"), tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API that do not have the \"embedding\" capability.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names that do not have the \"embedding\" capability. If\n `tool_model_enabled` is True, only models supporting tool calling are included.\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Normalize the base URL to avoid the repeated \"/\" at the end\n base_url = base_url_value.rstrip(\"/\") + \"/\"\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n # Fetch available models\n tags_response = await client.get(tags_url)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(show_url, json=payload)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY, [])\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n if self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n" - }, - "format": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Format", - "dynamic": false, - "info": "Specify the format of the output (e.g., json).", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "format", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "input_value": { - "_input_type": "MessageInput", - "advanced": false, - "display_name": "Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "input_value", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata": { - "_input_type": "DictInput", - "advanced": true, - "display_name": "Metadata", - "dynamic": false, - "info": "Metadata to add to the run trace.", - "list": false, - "list_add_label": "Add More", - "name": "metadata", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "type": "dict", - "value": {} - }, - "mirostat": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Mirostat", - "dynamic": false, - "info": "Enable/disable Mirostat sampling for controlling perplexity.", - "name": "mirostat", - "options": [ - "Disabled", - "Mirostat", - "Mirostat 2.0" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Disabled" - }, - "mirostat_eta": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Mirostat Eta", - "dynamic": false, - "info": "Learning rate for Mirostat algorithm. (Default: 0.1)", - "list": false, - "list_add_label": "Add More", - "name": "mirostat_eta", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "mirostat_tau": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Mirostat Tau", - "dynamic": false, - "info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)", - "list": false, - "list_add_label": "Add More", - "name": "mirostat_tau", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": false, - "info": "Refer to https://ollama.com/library for more models.", - "name": "model_name", - "options": [ - "qwen3:4b" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "qwen3:4b" - }, - "num_ctx": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Context Window Size", - "dynamic": false, - "info": "Size of the context window for generating tokens. (Default: 2048)", - "list": false, - "list_add_label": "Add More", - "name": "num_ctx", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "num_gpu": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of GPUs", - "dynamic": false, - "info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)", - "list": false, - "list_add_label": "Add More", - "name": "num_gpu", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "num_thread": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Threads", - "dynamic": false, - "info": "Number of threads to use during computation. (Default: detected for optimal performance)", - "list": false, - "list_add_label": "Add More", - "name": "num_thread", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "repeat_last_n": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Repeat Last N", - "dynamic": false, - "info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", - "list": false, - "list_add_label": "Add More", - "name": "repeat_last_n", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "repeat_penalty": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Repeat Penalty", - "dynamic": false, - "info": "Penalty for repetitions in generated text. (Default: 1.1)", - "list": false, - "list_add_label": "Add More", - "name": "repeat_penalty", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "stop_tokens": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Stop Tokens", - "dynamic": false, - "info": "Comma-separated list of tokens to signal the model to stop generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "stop_tokens", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "stream": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Stream", - "dynamic": false, - "info": "Stream the response from the model. Streaming works only in Chat.", - "list": false, - "list_add_label": "Add More", - "name": "stream", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "system": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "System", - "dynamic": false, - "info": "System to use for generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "system", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "system_message": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "System Message", - "dynamic": false, - "info": "System message to pass to the model.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_message", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "tags": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Tags", - "dynamic": false, - "info": "Comma-separated list of tags to add to the run trace.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "tags", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "temperature": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Temperature", - "dynamic": false, - "info": "", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", - "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" + "tool_mode": false, + "last_updated": "2025-09-29T18:39:30.798Z", + "official": false }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.1 + "showNode": true, + "type": "OllamaModel", + "id": "OllamaModel-8Re0J", + "selected_output": "model_output" }, - "template": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Template", - "dynamic": false, - "info": "Template to use for generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "template", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" + "id": "OllamaModel-8Re0J", + "position": { + "x": 0, + "y": 0 }, - "tfs_z": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "TFS Z", - "dynamic": false, - "info": "Tail free sampling value. (Default: 1)", - "list": false, - "list_add_label": "Add More", - "name": "tfs_z", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "timeout": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Timeout", - "dynamic": false, - "info": "Timeout for the request stream.", - "list": false, - "list_add_label": "Add More", - "name": "timeout", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "tool_model_enabled": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Tool Model Enabled", - "dynamic": false, - "info": "Whether to enable tool calling in the model.", - "list": false, - "list_add_label": "Add More", - "name": "tool_model_enabled", - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "top_k": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Top K", - "dynamic": false, - "info": "Limits token selection to top K. (Default: 40)", - "list": false, - "list_add_label": "Add More", - "name": "top_k", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "top_p": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Top P", - "dynamic": false, - "info": "Works together with top-k. (Default: 0.9)", - "list": false, - "list_add_label": "Add More", - "name": "top_p", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "verbose": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Verbose", - "dynamic": false, - "info": "Whether to print out response text.", - "list": false, - "list_add_label": "Add More", - "name": "verbose", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - } - }, - "tool_mode": false - }, - "selected_output": "model_output", - "showNode": true, - "type": "OllamaModel" + "type": "genericNode" + } + ], + "viewport": { + "x": 1, + "y": 1, + "zoom": 1 + } }, - "dragging": false, - "id": "OllamaModel-eCsJx", - "measured": { - "height": 494, - "width": 320 - }, - "position": { - "x": 248.08287272472313, - "y": 216.98088326271431 - }, - "selected": false, - "type": "genericNode" + "description": "Generate text using Ollama Local LLMs.", + "name": "Ollama", + "id": "OllamaModel-8Re0J", + "is_component": true, + "last_tested_version": "1.6.0" } \ No newline at end of file diff --git a/flows/components/ollama_llm_text.json b/flows/components/ollama_llm_text.json index 846e8313..5d8076ed 100644 --- a/flows/components/ollama_llm_text.json +++ b/flows/components/ollama_llm_text.json @@ -1,687 +1,700 @@ { - "data": { - "id": "OllamaModel-XDGqZ", - "node": { - "base_classes": [ - "LanguageModel", - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate text using Ollama Local LLMs.", - "display_name": "Ollama", - "documentation": "", - "edited": false, - "field_order": [ - "base_url", - "model_name", - "temperature", - "format", - "metadata", - "mirostat", - "mirostat_eta", - "mirostat_tau", - "num_ctx", - "num_gpu", - "num_thread", - "repeat_last_n", - "repeat_penalty", - "tfs_z", - "timeout", - "top_k", - "top_p", - "verbose", - "tags", - "stop_tokens", - "system", - "tool_model_enabled", - "template", - "input_value", - "system_message", - "stream" - ], - "frozen": false, - "icon": "Ollama", - "last_updated": "2025-09-22T20:14:45.057Z", - "legacy": false, - "metadata": { - "code_hash": "af399d429d23", - "dependencies": { - "dependencies": [ + "data": { + "edges": [], + "nodes": [ + { + "data": { + "node": { + "template": { + "_type": "Component", + "base_url": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "base_url", + "value": "", + "display_name": "Base URL", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Endpoint of the Ollama API.", + "real_time_refresh": true, + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "code": { + "type": "code", + "required": true, + "placeholder": "", + "list": false, + "show": true, + "multiline": true, + "value": "import asyncio\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.base.models.ollama_constants import URL_LIST\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SliderInput\nfrom lfx.log.logger import logger\n\nHTTP_STATUS_OK = 200\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n inputs = [\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Base URL\",\n info=\"Endpoint of the Ollama API.\",\n value=\"\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n MessageTextInput(\n name=\"format\", display_name=\"Format\", info=\"Specify the format of the output (e.g., json).\", advanced=True\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(name=\"verbose\", display_name=\"Verbose\", info=\"Whether to print out response text.\", advanced=True),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n *LCModelComponent.get_base_inputs(),\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to 0 for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, 0)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value == 0:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": self.base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": self.format,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.verbose,\n \"template\": self.template,\n }\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(urljoin(url, \"api/tags\"))).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"base_url\", \"model_name\"}:\n if build_config[\"base_url\"].get(\"load_from_db\", False):\n base_url_value = await self.get_variables(build_config[\"base_url\"].get(\"value\", \"\"), \"base_url\")\n else:\n base_url_value = build_config[\"base_url\"].get(\"value\", \"\")\n\n if not await self.is_valid_ollama_url(base_url_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n check_urls = URL_LIST\n if self.base_url:\n check_urls = [self.base_url, *URL_LIST]\n for url in check_urls:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n if valid_url != \"\":\n build_config[\"base_url\"][\"value\"] = valid_url\n else:\n msg = \"No valid Ollama URL found.\"\n raise ValueError(msg)\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n self.base_url, tool_model_enabled=tool_model_enabled\n )\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n build_config[\"base_url\"].get(\"value\", \"\"), tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API that do not have the \"embedding\" capability.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names that do not have the \"embedding\" capability. If\n `tool_model_enabled` is True, only models supporting tool calling are included.\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Normalize the base URL to avoid the repeated \"/\" at the end\n base_url = base_url_value.rstrip(\"/\") + \"/\"\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n # Fetch available models\n tags_response = await client.get(tags_url)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(show_url, json=payload)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY, [])\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n if self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n", + "fileTypes": [], + "file_path": "", + "password": false, + "name": "code", + "advanced": true, + "dynamic": true, + "info": "", + "load_from_db": false, + "title_case": false + }, + "format": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "format", + "value": "", + "display_name": "Format", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Specify the format of the output (e.g., json).", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "input_value": { + "trace_as_input": true, + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "input_value", + "value": "", + "display_name": "Input", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "", + "title_case": false, + "type": "str", + "_input_type": "MessageInput" + }, + "metadata": { + "tool_mode": false, + "trace_as_input": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "metadata", + "value": {}, + "display_name": "Metadata", + "advanced": true, + "dynamic": false, + "info": "Metadata to add to the run trace.", + "title_case": false, + "type": "dict", + "_input_type": "DictInput" + }, + "mirostat": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [ + "Disabled", + "Mirostat", + "Mirostat 2.0" + ], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat", + "value": "Disabled", + "display_name": "Mirostat", + "advanced": true, + "dynamic": false, + "info": "Enable/disable Mirostat sampling for controlling perplexity.", + "real_time_refresh": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "mirostat_eta": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat_eta", + "value": "", + "display_name": "Mirostat Eta", + "advanced": true, + "dynamic": false, + "info": "Learning rate for Mirostat algorithm. (Default: 0.1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "mirostat_tau": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "mirostat_tau", + "value": "", + "display_name": "Mirostat Tau", + "advanced": true, + "dynamic": false, + "info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "model_name": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "model_name", + "value": "", + "display_name": "Model Name", + "advanced": false, + "dynamic": false, + "info": "Refer to https://ollama.com/library for more models.", + "real_time_refresh": true, + "refresh_button": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "num_ctx": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_ctx", + "value": "", + "display_name": "Context Window Size", + "advanced": true, + "dynamic": false, + "info": "Size of the context window for generating tokens. (Default: 2048)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "num_gpu": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_gpu", + "value": "", + "display_name": "Number of GPUs", + "advanced": true, + "dynamic": false, + "info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "num_thread": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "num_thread", + "value": "", + "display_name": "Number of Threads", + "advanced": true, + "dynamic": false, + "info": "Number of threads to use during computation. (Default: detected for optimal performance)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "repeat_last_n": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "repeat_last_n", + "value": "", + "display_name": "Repeat Last N", + "advanced": true, + "dynamic": false, + "info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "repeat_penalty": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "repeat_penalty", + "value": "", + "display_name": "Repeat Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for repetitions in generated text. (Default: 1.1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "stop_tokens": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stop_tokens", + "value": "", + "display_name": "Stop Tokens", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Comma-separated list of tokens to signal the model to stop generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "stream": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stream", + "value": false, + "display_name": "Stream", + "advanced": true, + "dynamic": false, + "info": "Stream the response from the model. Streaming works only in Chat.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "system": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system", + "value": "", + "display_name": "System", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System to use for generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "system_message": { + "tool_mode": false, + "trace_as_input": true, + "multiline": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system_message", + "value": "", + "display_name": "System Message", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System message to pass to the model.", + "title_case": false, + "copy_field": false, + "type": "str", + "_input_type": "MultilineInput" + }, + "tags": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tags", + "value": "", + "display_name": "Tags", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Comma-separated list of tags to add to the run trace.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "temperature": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 1, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "temperature", + "value": 0.1, + "display_name": "Temperature", + "advanced": true, + "dynamic": false, + "info": "", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "template": { + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "template", + "value": "", + "display_name": "Template", + "advanced": true, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "Template to use for generating text.", + "title_case": false, + "type": "str", + "_input_type": "MessageTextInput" + }, + "tfs_z": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tfs_z", + "value": "", + "display_name": "TFS Z", + "advanced": true, + "dynamic": false, + "info": "Tail free sampling value. (Default: 1)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "timeout": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "timeout", + "value": "", + "display_name": "Timeout", + "advanced": true, + "dynamic": false, + "info": "Timeout for the request stream.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "tool_model_enabled": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "tool_model_enabled", + "value": true, + "display_name": "Tool Model Enabled", + "advanced": false, + "dynamic": false, + "info": "Whether to enable tool calling in the model.", + "real_time_refresh": true, + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "top_k": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_k", + "value": "", + "display_name": "Top K", + "advanced": true, + "dynamic": false, + "info": "Limits token selection to top K. (Default: 40)", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "top_p": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_p", + "value": "", + "display_name": "Top P", + "advanced": true, + "dynamic": false, + "info": "Works together with top-k. (Default: 0.9)", + "title_case": false, + "type": "float", + "_input_type": "FloatInput" + }, + "verbose": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "verbose", + "value": false, + "display_name": "Verbose", + "advanced": true, + "dynamic": false, + "info": "Whether to print out response text.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + } + }, + "description": "Generate text using Ollama Local LLMs.", + "icon": "Ollama", + "base_classes": [ + "LanguageModel", + "Message" + ], + "display_name": "Ollama", + "documentation": "", + "minimized": false, + "custom_fields": {}, + "output_types": [], + "pinned": false, + "conditional_paths": [], + "frozen": false, + "outputs": [ { - "name": "httpx", - "version": "0.28.1" + "types": [ + "Message" + ], + "selected": "Message", + "name": "text_output", + "display_name": "Model Response", + "method": "text_response", + "value": "__UNDEFINED__", + "cache": true, + "required_inputs": null, + "allows_loop": false, + "group_outputs": false, + "options": null, + "tool_mode": true }, { - "name": "langchain_ollama", - "version": "0.2.1" - }, - { - "name": "langflow", - "version": null + "types": [ + "LanguageModel" + ], + "selected": "LanguageModel", + "name": "model_output", + "display_name": "Language Model", + "method": "build_model", + "value": "__UNDEFINED__", + "cache": true, + "required_inputs": null, + "allows_loop": false, + "group_outputs": false, + "options": null, + "tool_mode": true } ], - "total_dependencies": 3 - }, - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ], - "module": "langflow.components.ollama.ollama.ChatOllamaComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Model Response", - "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" + "field_order": [ + "base_url", + "model_name", + "temperature", + "format", + "metadata", + "mirostat", + "mirostat_eta", + "mirostat_tau", + "num_ctx", + "num_gpu", + "num_thread", + "repeat_last_n", + "repeat_penalty", + "tfs_z", + "timeout", + "top_k", + "top_p", + "verbose", + "tags", + "stop_tokens", + "system", + "tool_model_enabled", + "template", + "input_value", + "system_message", + "stream" ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "base_url": { - "_input_type": "MessageTextInput", - "advanced": false, - "display_name": "Base URL", - "dynamic": false, - "info": "Endpoint of the Ollama API.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "base_url", - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "OLLAMA_BASE_URL" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import asyncio\nfrom typing import Any\nfrom urllib.parse import urljoin\n\nimport httpx\nfrom langchain_ollama import ChatOllama\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.base.models.ollama_constants import URL_LIST\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.io import BoolInput, DictInput, DropdownInput, FloatInput, IntInput, MessageTextInput, SliderInput\nfrom langflow.logging import logger\n\nHTTP_STATUS_OK = 200\n\n\nclass ChatOllamaComponent(LCModelComponent):\n display_name = \"Ollama\"\n description = \"Generate text using Ollama Local LLMs.\"\n icon = \"Ollama\"\n name = \"OllamaModel\"\n\n # Define constants for JSON keys\n JSON_MODELS_KEY = \"models\"\n JSON_NAME_KEY = \"name\"\n JSON_CAPABILITIES_KEY = \"capabilities\"\n DESIRED_CAPABILITY = \"completion\"\n TOOL_CALLING_CAPABILITY = \"tools\"\n\n inputs = [\n MessageTextInput(\n name=\"base_url\",\n display_name=\"Base URL\",\n info=\"Endpoint of the Ollama API.\",\n value=\"\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n info=\"Refer to https://ollama.com/library for more models.\",\n refresh_button=True,\n real_time_refresh=True,\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n MessageTextInput(\n name=\"format\", display_name=\"Format\", info=\"Specify the format of the output (e.g., json).\", advanced=True\n ),\n DictInput(name=\"metadata\", display_name=\"Metadata\", info=\"Metadata to add to the run trace.\", advanced=True),\n DropdownInput(\n name=\"mirostat\",\n display_name=\"Mirostat\",\n options=[\"Disabled\", \"Mirostat\", \"Mirostat 2.0\"],\n info=\"Enable/disable Mirostat sampling for controlling perplexity.\",\n value=\"Disabled\",\n advanced=True,\n real_time_refresh=True,\n ),\n FloatInput(\n name=\"mirostat_eta\",\n display_name=\"Mirostat Eta\",\n info=\"Learning rate for Mirostat algorithm. (Default: 0.1)\",\n advanced=True,\n ),\n FloatInput(\n name=\"mirostat_tau\",\n display_name=\"Mirostat Tau\",\n info=\"Controls the balance between coherence and diversity of the output. (Default: 5.0)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_ctx\",\n display_name=\"Context Window Size\",\n info=\"Size of the context window for generating tokens. (Default: 2048)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_gpu\",\n display_name=\"Number of GPUs\",\n info=\"Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)\",\n advanced=True,\n ),\n IntInput(\n name=\"num_thread\",\n display_name=\"Number of Threads\",\n info=\"Number of threads to use during computation. (Default: detected for optimal performance)\",\n advanced=True,\n ),\n IntInput(\n name=\"repeat_last_n\",\n display_name=\"Repeat Last N\",\n info=\"How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\",\n advanced=True,\n ),\n FloatInput(\n name=\"repeat_penalty\",\n display_name=\"Repeat Penalty\",\n info=\"Penalty for repetitions in generated text. (Default: 1.1)\",\n advanced=True,\n ),\n FloatInput(name=\"tfs_z\", display_name=\"TFS Z\", info=\"Tail free sampling value. (Default: 1)\", advanced=True),\n IntInput(name=\"timeout\", display_name=\"Timeout\", info=\"Timeout for the request stream.\", advanced=True),\n IntInput(\n name=\"top_k\", display_name=\"Top K\", info=\"Limits token selection to top K. (Default: 40)\", advanced=True\n ),\n FloatInput(name=\"top_p\", display_name=\"Top P\", info=\"Works together with top-k. (Default: 0.9)\", advanced=True),\n BoolInput(name=\"verbose\", display_name=\"Verbose\", info=\"Whether to print out response text.\", advanced=True),\n MessageTextInput(\n name=\"tags\",\n display_name=\"Tags\",\n info=\"Comma-separated list of tags to add to the run trace.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"stop_tokens\",\n display_name=\"Stop Tokens\",\n info=\"Comma-separated list of tokens to signal the model to stop generating text.\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"system\", display_name=\"System\", info=\"System to use for generating text.\", advanced=True\n ),\n BoolInput(\n name=\"tool_model_enabled\",\n display_name=\"Tool Model Enabled\",\n info=\"Whether to enable tool calling in the model.\",\n value=True,\n real_time_refresh=True,\n ),\n MessageTextInput(\n name=\"template\", display_name=\"Template\", info=\"Template to use for generating text.\", advanced=True\n ),\n *LCModelComponent._base_inputs,\n ]\n\n def build_model(self) -> LanguageModel: # type: ignore[type-var]\n # Mapping mirostat settings to their corresponding values\n mirostat_options = {\"Mirostat\": 1, \"Mirostat 2.0\": 2}\n\n # Default to 0 for 'Disabled'\n mirostat_value = mirostat_options.get(self.mirostat, 0)\n\n # Set mirostat_eta and mirostat_tau to None if mirostat is disabled\n if mirostat_value == 0:\n mirostat_eta = None\n mirostat_tau = None\n else:\n mirostat_eta = self.mirostat_eta\n mirostat_tau = self.mirostat_tau\n\n # Mapping system settings to their corresponding values\n llm_params = {\n \"base_url\": self.base_url,\n \"model\": self.model_name,\n \"mirostat\": mirostat_value,\n \"format\": self.format,\n \"metadata\": self.metadata,\n \"tags\": self.tags.split(\",\") if self.tags else None,\n \"mirostat_eta\": mirostat_eta,\n \"mirostat_tau\": mirostat_tau,\n \"num_ctx\": self.num_ctx or None,\n \"num_gpu\": self.num_gpu or None,\n \"num_thread\": self.num_thread or None,\n \"repeat_last_n\": self.repeat_last_n or None,\n \"repeat_penalty\": self.repeat_penalty or None,\n \"temperature\": self.temperature or None,\n \"stop\": self.stop_tokens.split(\",\") if self.stop_tokens else None,\n \"system\": self.system,\n \"tfs_z\": self.tfs_z or None,\n \"timeout\": self.timeout or None,\n \"top_k\": self.top_k or None,\n \"top_p\": self.top_p or None,\n \"verbose\": self.verbose,\n \"template\": self.template,\n }\n\n # Remove parameters with None values\n llm_params = {k: v for k, v in llm_params.items() if v is not None}\n\n try:\n output = ChatOllama(**llm_params)\n except Exception as e:\n msg = (\n \"Unable to connect to the Ollama API. \",\n \"Please verify the base URL, ensure the relevant Ollama model is pulled, and try again.\",\n )\n raise ValueError(msg) from e\n\n return output\n\n async def is_valid_ollama_url(self, url: str) -> bool:\n try:\n async with httpx.AsyncClient() as client:\n return (await client.get(urljoin(url, \"api/tags\"))).status_code == HTTP_STATUS_OK\n except httpx.RequestError:\n return False\n\n async def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None):\n if field_name == \"mirostat\":\n if field_value == \"Disabled\":\n build_config[\"mirostat_eta\"][\"advanced\"] = True\n build_config[\"mirostat_tau\"][\"advanced\"] = True\n build_config[\"mirostat_eta\"][\"value\"] = None\n build_config[\"mirostat_tau\"][\"value\"] = None\n\n else:\n build_config[\"mirostat_eta\"][\"advanced\"] = False\n build_config[\"mirostat_tau\"][\"advanced\"] = False\n\n if field_value == \"Mirostat 2.0\":\n build_config[\"mirostat_eta\"][\"value\"] = 0.2\n build_config[\"mirostat_tau\"][\"value\"] = 10\n else:\n build_config[\"mirostat_eta\"][\"value\"] = 0.1\n build_config[\"mirostat_tau\"][\"value\"] = 5\n\n if field_name in {\"base_url\", \"model_name\"}:\n if build_config[\"base_url\"].get(\"load_from_db\", False):\n base_url_value = await self.get_variables(build_config[\"base_url\"].get(\"value\", \"\"), \"base_url\")\n else:\n base_url_value = build_config[\"base_url\"].get(\"value\", \"\")\n\n if not await self.is_valid_ollama_url(base_url_value):\n # Check if any URL in the list is valid\n valid_url = \"\"\n check_urls = URL_LIST\n if self.base_url:\n check_urls = [self.base_url, *URL_LIST]\n for url in check_urls:\n if await self.is_valid_ollama_url(url):\n valid_url = url\n break\n if valid_url != \"\":\n build_config[\"base_url\"][\"value\"] = valid_url\n else:\n msg = \"No valid Ollama URL found.\"\n raise ValueError(msg)\n if field_name in {\"model_name\", \"base_url\", \"tool_model_enabled\"}:\n if await self.is_valid_ollama_url(self.base_url):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n self.base_url, tool_model_enabled=tool_model_enabled\n )\n elif await self.is_valid_ollama_url(build_config[\"base_url\"].get(\"value\", \"\")):\n tool_model_enabled = build_config[\"tool_model_enabled\"].get(\"value\", False) or self.tool_model_enabled\n build_config[\"model_name\"][\"options\"] = await self.get_models(\n build_config[\"base_url\"].get(\"value\", \"\"), tool_model_enabled=tool_model_enabled\n )\n else:\n build_config[\"model_name\"][\"options\"] = []\n if field_name == \"keep_alive_flag\":\n if field_value == \"Keep\":\n build_config[\"keep_alive\"][\"value\"] = \"-1\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n elif field_value == \"Immediately\":\n build_config[\"keep_alive\"][\"value\"] = \"0\"\n build_config[\"keep_alive\"][\"advanced\"] = True\n else:\n build_config[\"keep_alive\"][\"advanced\"] = False\n\n return build_config\n\n async def get_models(self, base_url_value: str, *, tool_model_enabled: bool | None = None) -> list[str]:\n \"\"\"Fetches a list of models from the Ollama API that do not have the \"embedding\" capability.\n\n Args:\n base_url_value (str): The base URL of the Ollama API.\n tool_model_enabled (bool | None, optional): If True, filters the models further to include\n only those that support tool calling. Defaults to None.\n\n Returns:\n list[str]: A list of model names that do not have the \"embedding\" capability. If\n `tool_model_enabled` is True, only models supporting tool calling are included.\n\n Raises:\n ValueError: If there is an issue with the API request or response, or if the model\n names cannot be retrieved.\n \"\"\"\n try:\n # Normalize the base URL to avoid the repeated \"/\" at the end\n base_url = base_url_value.rstrip(\"/\") + \"/\"\n\n # Ollama REST API to return models\n tags_url = urljoin(base_url, \"api/tags\")\n\n # Ollama REST API to return model capabilities\n show_url = urljoin(base_url, \"api/show\")\n\n async with httpx.AsyncClient() as client:\n # Fetch available models\n tags_response = await client.get(tags_url)\n tags_response.raise_for_status()\n models = tags_response.json()\n if asyncio.iscoroutine(models):\n models = await models\n await logger.adebug(f\"Available models: {models}\")\n\n # Filter models that are NOT embedding models\n model_ids = []\n for model in models[self.JSON_MODELS_KEY]:\n model_name = model[self.JSON_NAME_KEY]\n await logger.adebug(f\"Checking model: {model_name}\")\n\n payload = {\"model\": model_name}\n show_response = await client.post(show_url, json=payload)\n show_response.raise_for_status()\n json_data = show_response.json()\n if asyncio.iscoroutine(json_data):\n json_data = await json_data\n capabilities = json_data.get(self.JSON_CAPABILITIES_KEY, [])\n await logger.adebug(f\"Model: {model_name}, Capabilities: {capabilities}\")\n\n if self.DESIRED_CAPABILITY in capabilities and (\n not tool_model_enabled or self.TOOL_CALLING_CAPABILITY in capabilities\n ):\n model_ids.append(model_name)\n\n except (httpx.RequestError, ValueError) as e:\n msg = \"Could not get model names from Ollama.\"\n raise ValueError(msg) from e\n\n return model_ids\n" - }, - "format": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Format", - "dynamic": false, - "info": "Specify the format of the output (e.g., json).", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "format", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "input_value": { - "_input_type": "MessageInput", - "advanced": false, - "display_name": "Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "input_value", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "metadata": { - "_input_type": "DictInput", - "advanced": true, - "display_name": "Metadata", - "dynamic": false, - "info": "Metadata to add to the run trace.", - "list": false, - "list_add_label": "Add More", - "name": "metadata", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "type": "dict", - "value": {} - }, - "mirostat": { - "_input_type": "DropdownInput", - "advanced": true, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Mirostat", - "dynamic": false, - "info": "Enable/disable Mirostat sampling for controlling perplexity.", - "name": "mirostat", - "options": [ - "Disabled", - "Mirostat", - "Mirostat 2.0" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "Disabled" - }, - "mirostat_eta": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Mirostat Eta", - "dynamic": false, - "info": "Learning rate for Mirostat algorithm. (Default: 0.1)", - "list": false, - "list_add_label": "Add More", - "name": "mirostat_eta", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "mirostat_tau": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Mirostat Tau", - "dynamic": false, - "info": "Controls the balance between coherence and diversity of the output. (Default: 5.0)", - "list": false, - "list_add_label": "Add More", - "name": "mirostat_tau", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": false, - "info": "Refer to https://ollama.com/library for more models.", - "name": "model_name", - "options": [ - "qwen3:4b" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "refresh_button": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "qwen3:4b" - }, - "num_ctx": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Context Window Size", - "dynamic": false, - "info": "Size of the context window for generating tokens. (Default: 2048)", - "list": false, - "list_add_label": "Add More", - "name": "num_ctx", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "num_gpu": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of GPUs", - "dynamic": false, - "info": "Number of GPUs to use for computation. (Default: 1 on macOS, 0 to disable)", - "list": false, - "list_add_label": "Add More", - "name": "num_gpu", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "num_thread": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Number of Threads", - "dynamic": false, - "info": "Number of threads to use during computation. (Default: detected for optimal performance)", - "list": false, - "list_add_label": "Add More", - "name": "num_thread", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "repeat_last_n": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Repeat Last N", - "dynamic": false, - "info": "How far back the model looks to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)", - "list": false, - "list_add_label": "Add More", - "name": "repeat_last_n", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "repeat_penalty": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Repeat Penalty", - "dynamic": false, - "info": "Penalty for repetitions in generated text. (Default: 1.1)", - "list": false, - "list_add_label": "Add More", - "name": "repeat_penalty", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "stop_tokens": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Stop Tokens", - "dynamic": false, - "info": "Comma-separated list of tokens to signal the model to stop generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "stop_tokens", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "stream": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Stream", - "dynamic": false, - "info": "Stream the response from the model. Streaming works only in Chat.", - "list": false, - "list_add_label": "Add More", - "name": "stream", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "system": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "System", - "dynamic": false, - "info": "System to use for generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "system", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "system_message": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "System Message", - "dynamic": false, - "info": "System message to pass to the model.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_message", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "tags": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Tags", - "dynamic": false, - "info": "Comma-separated list of tags to add to the run trace.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "tags", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "temperature": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Temperature", - "dynamic": false, - "info": "", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", - "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" + "beta": false, + "legacy": false, + "edited": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ], + "module": "lfx.components.ollama.ollama.ChatOllamaComponent", + "code_hash": "54de3b5da388", + "dependencies": { + "total_dependencies": 3, + "dependencies": [ + { + "name": "httpx", + "version": "0.28.1" + }, + { + "name": "langchain_ollama", + "version": "0.2.1" + }, + { + "name": "lfx", + "version": null + } + ] + } }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, "tool_mode": false, - "type": "slider", - "value": 0.1 + "last_updated": "2025-09-29T18:39:30.798Z", + "official": false }, - "template": { - "_input_type": "MessageTextInput", - "advanced": true, - "display_name": "Template", - "dynamic": false, - "info": "Template to use for generating text.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "template", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "tfs_z": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "TFS Z", - "dynamic": false, - "info": "Tail free sampling value. (Default: 1)", - "list": false, - "list_add_label": "Add More", - "name": "tfs_z", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "timeout": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Timeout", - "dynamic": false, - "info": "Timeout for the request stream.", - "list": false, - "list_add_label": "Add More", - "name": "timeout", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "tool_model_enabled": { - "_input_type": "BoolInput", - "advanced": false, - "display_name": "Tool Model Enabled", - "dynamic": false, - "info": "Whether to enable tool calling in the model.", - "list": false, - "list_add_label": "Add More", - "name": "tool_model_enabled", - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "top_k": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Top K", - "dynamic": false, - "info": "Limits token selection to top K. (Default: 40)", - "list": false, - "list_add_label": "Add More", - "name": "top_k", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": "" - }, - "top_p": { - "_input_type": "FloatInput", - "advanced": true, - "display_name": "Top P", - "dynamic": false, - "info": "Works together with top-k. (Default: 0.9)", - "list": false, - "list_add_label": "Add More", - "name": "top_p", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "float", - "value": "" - }, - "verbose": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Verbose", - "dynamic": false, - "info": "Whether to print out response text.", - "list": false, - "list_add_label": "Add More", - "name": "verbose", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - } + "showNode": true, + "type": "OllamaModel", + "id": "OllamaModel-8Re0J", + "selected_output": "text_output" }, - "tool_mode": false - }, - "selected_output": "text_output", - "showNode": true, - "type": "OllamaModel" - }, - "dragging": false, - "id": "OllamaModel-XDGqZ", - "measured": { - "height": 494, - "width": 320 - }, - "position": { - "x": 248.08287272472313, - "y": 216.98088326271431 - }, - "selected": false, - "type": "genericNode" - } \ No newline at end of file + "id": "OllamaModel-8Re0J", + "position": { + "x": 0, + "y": 0 + }, + "type": "genericNode" + } + ], + "viewport": { + "x": 1, + "y": 1, + "zoom": 1 + } + }, + "description": "Generate text using Ollama Local LLMs.", + "name": "Ollama", + "id": "OllamaModel-8Re0J", + "is_component": true, + "last_tested_version": "1.6.0" +} \ No newline at end of file diff --git a/flows/components/watsonx_embedding.json b/flows/components/watsonx_embedding.json index 850cfb07..31376819 100644 --- a/flows/components/watsonx_embedding.json +++ b/flows/components/watsonx_embedding.json @@ -1,246 +1 @@ -{ - "data": { - "id": "WatsonxEmbeddingsComponent-pJfXI", - "node": { - "base_classes": [ - "Embeddings" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate embeddings using IBM watsonx.ai models.", - "display_name": "IBM watsonx.ai Embeddings", - "documentation": "", - "edited": false, - "field_order": [ - "url", - "project_id", - "api_key", - "model_name", - "truncate_input_tokens", - "input_text" - ], - "frozen": false, - "icon": "WatsonxAI", - "last_updated": "2025-09-22T20:11:38.181Z", - "legacy": false, - "metadata": { - "code_hash": "b6c6d50cc7ed", - "dependencies": { - "dependencies": [ - { - "name": "requests", - "version": "2.32.5" - }, - { - "name": "ibm_watsonx_ai", - "version": "1.3.34" - }, - { - "name": "langchain_ibm", - "version": "0.3.16" - }, - { - "name": "pydantic", - "version": "2.10.6" - }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 5 - }, - "module": "langflow.components.ibm.watsonx_embeddings.WatsonxEmbeddingsComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Embedding Model", - "group_outputs": false, - "method": "build_embeddings", - "name": "embeddings", - "options": null, - "required_inputs": null, - "selected": "Embeddings", - "tool_mode": true, - "types": [ - "Embeddings" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "API Key", - "dynamic": false, - "info": "The API Key to use for the model.", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "WATSONX_API_KEY" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "from typing import Any\n\nimport requests\nfrom ibm_watsonx_ai import APIClient, Credentials\nfrom ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames\nfrom langchain_ibm import WatsonxEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.embeddings.model import LCEmbeddingsModel\nfrom langflow.field_typing import Embeddings\nfrom langflow.io import BoolInput, DropdownInput, IntInput, SecretStrInput, StrInput\nfrom langflow.logging.logger import logger\nfrom langflow.schema.dotdict import dotdict\n\n\nclass WatsonxEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"IBM watsonx.ai Embeddings\"\n description = \"Generate embeddings using IBM watsonx.ai models.\"\n icon = \"WatsonxAI\"\n name = \"WatsonxEmbeddingsComponent\"\n\n # models present in all the regions\n _default_models = [\n \"sentence-transformers/all-minilm-l12-v2\",\n \"ibm/slate-125m-english-rtrvr-v2\",\n \"ibm/slate-30m-english-rtrvr-v2\",\n \"intfloat/multilingual-e5-large\",\n ]\n\n inputs = [\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx project id\",\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"truncate_input_tokens\",\n display_name=\"Truncate Input Tokens\",\n advanced=True,\n value=200,\n ),\n BoolInput(\n name=\"input_text\",\n display_name=\"Include the original text in the output\",\n value=True,\n advanced=True,\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\n \"version\": \"2024-09-16\",\n \"filters\": \"function_embedding,!lifecycle_withdrawn:and\",\n }\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models\")\n return WatsonxEmbeddingsComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.debug(\n \"Updating build config. Field name: %s, Field value: %s\",\n field_name,\n field_value,\n )\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_embeddings(self) -> Embeddings:\n credentials = Credentials(\n api_key=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n )\n\n api_client = APIClient(credentials)\n\n params = {\n EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: self.truncate_input_tokens,\n EmbedTextParamsMetaNames.RETURN_OPTIONS: {\"input_text\": self.input_text},\n }\n\n return WatsonxEmbeddings(\n model_id=self.model_name,\n params=params,\n watsonx_client=api_client,\n project_id=self.project_id,\n )\n" - }, - "input_text": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Include the original text in the output", - "dynamic": false, - "info": "", - "list": false, - "list_add_label": "Add More", - "name": "input_text", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": true, - "info": "", - "name": "model_name", - "options": [ - "ibm/granite-embedding-107m-multilingual", - "ibm/granite-embedding-278m-multilingual", - "ibm/slate-125m-english-rtrvr", - "ibm/slate-125m-english-rtrvr-v2", - "ibm/slate-30m-english-rtrvr", - "ibm/slate-30m-english-rtrvr-v2", - "intfloat/multilingual-e5-large", - "sentence-transformers/all-minilm-l6-v2" - ], - "options_metadata": [], - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "ibm/granite-embedding-107m-multilingual" - }, - "project_id": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "watsonx project id", - "dynamic": false, - "info": "The project ID or deployment space ID that is associated with the foundation model.", - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "project_id", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "WATSONX_PROJECT_ID" - }, - "truncate_input_tokens": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Truncate Input Tokens", - "dynamic": false, - "info": "", - "list": false, - "list_add_label": "Add More", - "name": "truncate_input_tokens", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 200 - }, - "url": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "watsonx API Endpoint", - "dynamic": false, - "info": "The base URL of the API.", - "name": "url", - "options": [ - "https://us-south.ml.cloud.ibm.com", - "https://eu-de.ml.cloud.ibm.com", - "https://eu-gb.ml.cloud.ibm.com", - "https://au-syd.ml.cloud.ibm.com", - "https://jp-tok.ml.cloud.ibm.com", - "https://ca-tor.ml.cloud.ibm.com" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "https://us-south.ml.cloud.ibm.com" - } - }, - "tool_mode": false - }, - "showNode": true, - "type": "WatsonxEmbeddingsComponent" - }, - "dragging": false, - "id": "WatsonxEmbeddingsComponent-pJfXI", - "measured": { - "height": 467, - "width": 320 - }, - "position": { - "x": 364.4406919374723, - "y": 282.29319267029086 - }, - "selected": false, - "type": "genericNode" -} \ No newline at end of file +{"data":{"edges":[],"nodes":[{"data":{"node":{"template":{"_type":"Component","api_key":{"load_from_db":false,"required":true,"placeholder":"","show":true,"name":"api_key","value":"","display_name":"Watsonx API Key","advanced":false,"input_types":[],"dynamic":false,"info":"The API Key to use for the model.","title_case":false,"password":true,"type":"str","_input_type":"SecretStrInput"},"code":{"type":"code","required":true,"placeholder":"","list":false,"show":true,"multiline":true,"value":"from typing import Any\n\nimport requests\nfrom ibm_watsonx_ai import APIClient, Credentials\nfrom ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames\nfrom langchain_ibm import WatsonxEmbeddings\nfrom pydantic.v1 import SecretStr\n\nfrom lfx.base.embeddings.model import LCEmbeddingsModel\nfrom lfx.field_typing import Embeddings\nfrom lfx.io import BoolInput, DropdownInput, IntInput, SecretStrInput, StrInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dotdict import dotdict\n\n\nclass WatsonxEmbeddingsComponent(LCEmbeddingsModel):\n display_name = \"IBM watsonx.ai Embeddings\"\n description = \"Generate embeddings using IBM watsonx.ai models.\"\n icon = \"WatsonxAI\"\n name = \"WatsonxEmbeddingsComponent\"\n\n # models present in all the regions\n _default_models = [\n \"sentence-transformers/all-minilm-l12-v2\",\n \"ibm/slate-125m-english-rtrvr-v2\",\n \"ibm/slate-30m-english-rtrvr-v2\",\n \"intfloat/multilingual-e5-large\",\n ]\n\n inputs = [\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx project id\",\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n required=True,\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Watsonx API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"truncate_input_tokens\",\n display_name=\"Truncate Input Tokens\",\n advanced=True,\n value=200,\n ),\n BoolInput(\n name=\"input_text\",\n display_name=\"Include the original text in the output\",\n value=True,\n advanced=True,\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\n \"version\": \"2024-09-16\",\n \"filters\": \"function_embedding,!lifecycle_withdrawn:and\",\n }\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models\")\n return WatsonxEmbeddingsComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.debug(\n \"Updating build config. Field name: %s, Field value: %s\",\n field_name,\n field_value,\n )\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_embeddings(self) -> Embeddings:\n credentials = Credentials(\n api_key=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n )\n\n api_client = APIClient(credentials)\n\n params = {\n EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: self.truncate_input_tokens,\n EmbedTextParamsMetaNames.RETURN_OPTIONS: {\"input_text\": self.input_text},\n }\n\n return WatsonxEmbeddings(\n model_id=self.model_name,\n params=params,\n watsonx_client=api_client,\n project_id=self.project_id,\n )\n","fileTypes":[],"file_path":"","password":false,"name":"code","advanced":true,"dynamic":true,"info":"","load_from_db":false,"title_case":false},"input_text":{"tool_mode":false,"trace_as_metadata":true,"list":false,"list_add_label":"Add More","required":false,"placeholder":"","show":true,"name":"input_text","value":true,"display_name":"Include the original text in the output","advanced":true,"dynamic":false,"info":"","title_case":false,"type":"bool","_input_type":"BoolInput"},"model_name":{"tool_mode":false,"trace_as_metadata":true,"options":[],"options_metadata":[],"combobox":false,"dialog_inputs":{},"toggle":false,"required":true,"placeholder":"","show":true,"name":"model_name","display_name":"Model Name","advanced":false,"dynamic":true,"info":"","title_case":false,"external_options":{},"type":"str","_input_type":"DropdownInput"},"project_id":{"tool_mode":false,"trace_as_metadata":true,"load_from_db":false,"list":false,"list_add_label":"Add More","required":true,"placeholder":"","show":true,"name":"project_id","value":"","display_name":"watsonx project id","advanced":false,"dynamic":false,"info":"The project ID or deployment space ID that is associated with the foundation model.","title_case":false,"type":"str","_input_type":"StrInput"},"truncate_input_tokens":{"tool_mode":false,"trace_as_metadata":true,"list":false,"list_add_label":"Add More","required":false,"placeholder":"","show":true,"name":"truncate_input_tokens","value":200,"display_name":"Truncate Input Tokens","advanced":true,"dynamic":false,"info":"","title_case":false,"type":"int","_input_type":"IntInput"},"url":{"tool_mode":false,"trace_as_metadata":true,"options":["https://us-south.ml.cloud.ibm.com","https://eu-de.ml.cloud.ibm.com","https://eu-gb.ml.cloud.ibm.com","https://au-syd.ml.cloud.ibm.com","https://jp-tok.ml.cloud.ibm.com","https://ca-tor.ml.cloud.ibm.com"],"options_metadata":[],"combobox":false,"dialog_inputs":{},"toggle":false,"required":false,"placeholder":"","show":true,"name":"url","display_name":"watsonx API Endpoint","advanced":false,"dynamic":false,"info":"The base URL of the API.","real_time_refresh":true,"title_case":false,"external_options":{},"type":"str","_input_type":"DropdownInput"}},"description":"Generate embeddings using IBM watsonx.ai models.","icon":"WatsonxAI","base_classes":["Embeddings"],"display_name":"IBM watsonx.ai Embeddings","documentation":"","minimized":false,"custom_fields":{},"output_types":[],"pinned":false,"conditional_paths":[],"frozen":false,"outputs":[{"types":["Embeddings"],"selected":"Embeddings","name":"embeddings","display_name":"Embedding Model","method":"build_embeddings","value":"__UNDEFINED__","cache":true,"allows_loop":false,"group_outputs":false,"tool_mode":true}],"field_order":["url","project_id","api_key","model_name","truncate_input_tokens","input_text"],"beta":false,"legacy":false,"edited":false,"metadata":{"module":"lfx.components.ibm.watsonx_embeddings.WatsonxEmbeddingsComponent","code_hash":"ffded413ea90","dependencies":{"total_dependencies":5,"dependencies":[{"name":"requests","version":"2.32.5"},{"name":"ibm_watsonx_ai","version":"1.3.34"},{"name":"langchain_ibm","version":"0.3.16"},{"name":"pydantic","version":"2.10.6"},{"name":"lfx","version":null}]}},"tool_mode":false,"official":false},"showNode":true,"type":"WatsonxEmbeddingsComponent","id":"WatsonxEmbeddingsComponent-q67FN"},"id":"WatsonxEmbeddingsComponent-q67FN","position":{"x":0,"y":0},"type":"genericNode"}],"viewport":{"x":1,"y":1,"zoom":1}},"description":"Generate embeddings using IBM watsonx.ai models.","name":"IBM watsonx.ai Embeddings","id":"WatsonxEmbeddingsComponent-q67FN","is_component":true,"last_tested_version":"1.6.0"} \ No newline at end of file diff --git a/flows/components/watsonx_llm.json b/flows/components/watsonx_llm.json index 99a4a936..3d02d1fe 100644 --- a/flows/components/watsonx_llm.json +++ b/flows/components/watsonx_llm.json @@ -1,550 +1,538 @@ { "data": { - "id": "IBMwatsonxModel-jA4Nw", - "node": { - "base_classes": [ - "LanguageModel", - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate text using IBM watsonx.ai foundation models.", - "display_name": "IBM watsonx.ai", - "documentation": "", - "edited": false, - "field_order": [ - "input_value", - "system_message", - "stream", - "url", - "project_id", - "api_key", - "model_name", - "max_tokens", - "stop_sequence", - "temperature", - "top_p", - "frequency_penalty", - "presence_penalty", - "seed", - "logprobs", - "top_logprobs", - "logit_bias" - ], - "frozen": false, - "icon": "WatsonxAI", - "last_updated": "2025-09-22T20:03:31.248Z", - "legacy": false, - "metadata": { - "code_hash": "7767fd69a954", - "dependencies": { - "dependencies": [ - { - "name": "requests", - "version": "2.32.5" + "edges": [], + "nodes": [ + { + "data": { + "node": { + "template": { + "_type": "Component", + "api_key": { + "load_from_db": false, + "required": true, + "placeholder": "", + "show": true, + "name": "api_key", + "value": "", + "display_name": "Watsonx API Key", + "advanced": false, + "input_types": [], + "dynamic": false, + "info": "The API Key to use for the model.", + "title_case": false, + "password": true, + "type": "str", + "_input_type": "SecretStrInput" + }, + "code": { + "type": "code", + "required": true, + "placeholder": "", + "list": false, + "show": true, + "multiline": true, + "value": "import json\nfrom typing import Any\n\nimport requests\nfrom langchain_ibm import ChatWatsonx\nfrom pydantic.v1 import SecretStr\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, SecretStrInput, SliderInput, StrInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dotdict import dotdict\n\n\nclass WatsonxAIComponent(LCModelComponent):\n display_name = \"IBM watsonx.ai\"\n description = \"Generate text using IBM watsonx.ai foundation models.\"\n icon = \"WatsonxAI\"\n name = \"IBMwatsonxModel\"\n beta = False\n\n _default_models = [\"ibm/granite-3-2b-instruct\", \"ibm/granite-3-8b-instruct\", \"ibm/granite-13b-instruct-v2\"]\n\n inputs = [\n *LCModelComponent.get_base_inputs(),\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx Project ID\",\n required=True,\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Watsonx API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate.\",\n range_spec=RangeSpec(min=1, max=4096),\n value=1000,\n ),\n StrInput(\n name=\"stop_sequence\",\n display_name=\"Stop Sequence\",\n advanced=True,\n info=\"Sequence where generation should stop.\",\n field_type=\"str\",\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n info=\"Controls randomness, higher values increase diversity.\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"top_p\",\n display_name=\"Top P\",\n info=\"The cumulative probability cutoff for token selection. \"\n \"Lower values mean sampling from a smaller, more top-weighted nucleus.\",\n value=0.9,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"frequency_penalty\",\n display_name=\"Frequency Penalty\",\n info=\"Penalty for frequency of token usage.\",\n value=0.5,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"presence_penalty\",\n display_name=\"Presence Penalty\",\n info=\"Penalty for token presence in prior text.\",\n value=0.3,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n IntInput(\n name=\"seed\",\n display_name=\"Random Seed\",\n advanced=True,\n info=\"The random seed for the model.\",\n value=8,\n ),\n BoolInput(\n name=\"logprobs\",\n display_name=\"Log Probabilities\",\n advanced=True,\n info=\"Whether to return log probabilities of the output tokens.\",\n value=True,\n ),\n IntInput(\n name=\"top_logprobs\",\n display_name=\"Top Log Probabilities\",\n advanced=True,\n info=\"Number of most likely tokens to return at each position.\",\n value=3,\n range_spec=RangeSpec(min=1, max=20),\n ),\n StrInput(\n name=\"logit_bias\",\n display_name=\"Logit Bias\",\n advanced=True,\n info='JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).',\n field_type=\"str\",\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\"version\": \"2024-09-16\", \"filters\": \"function_text_chat,!lifecycle_withdrawn\"}\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models. Using default models.\")\n return WatsonxAIComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.info(\"Updating build config. Field name: %s, Field value: %s\", field_name, field_value)\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_model(self) -> LanguageModel:\n # Parse logit_bias from JSON string if provided\n logit_bias = None\n if hasattr(self, \"logit_bias\") and self.logit_bias:\n try:\n logit_bias = json.loads(self.logit_bias)\n except json.JSONDecodeError:\n logger.warning(\"Invalid logit_bias JSON format. Using default instead.\")\n logit_bias = {\"1003\": -100, \"1004\": -100}\n\n chat_params = {\n \"max_tokens\": getattr(self, \"max_tokens\", None),\n \"temperature\": getattr(self, \"temperature\", None),\n \"top_p\": getattr(self, \"top_p\", None),\n \"frequency_penalty\": getattr(self, \"frequency_penalty\", None),\n \"presence_penalty\": getattr(self, \"presence_penalty\", None),\n \"seed\": getattr(self, \"seed\", None),\n \"stop\": [self.stop_sequence] if self.stop_sequence else [],\n \"n\": 1,\n \"logprobs\": getattr(self, \"logprobs\", True),\n \"top_logprobs\": getattr(self, \"top_logprobs\", None),\n \"time_limit\": 600000,\n \"logit_bias\": logit_bias,\n }\n\n return ChatWatsonx(\n apikey=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n project_id=self.project_id,\n model_id=self.model_name,\n params=chat_params,\n streaming=self.stream,\n )\n", + "fileTypes": [], + "file_path": "", + "password": false, + "name": "code", + "advanced": true, + "dynamic": true, + "info": "", + "load_from_db": false, + "title_case": false + }, + "frequency_penalty": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": -2, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "frequency_penalty", + "value": 0.5, + "display_name": "Frequency Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for frequency of token usage.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "input_value": { + "trace_as_input": true, + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "input_value", + "value": "", + "display_name": "Input", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "", + "title_case": false, + "type": "str", + "_input_type": "MessageInput" + }, + "logit_bias": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "logit_bias", + "value": "", + "display_name": "Logit Bias", + "advanced": true, + "dynamic": false, + "info": "JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "logprobs": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "logprobs", + "value": true, + "display_name": "Log Probabilities", + "advanced": true, + "dynamic": false, + "info": "Whether to return log probabilities of the output tokens.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "max_tokens": { + "tool_mode": false, + "trace_as_metadata": true, + "range_spec": { + "step_type": "float", + "min": 1, + "max": 4096, + "step": 0.1 + }, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "max_tokens", + "value": 1000, + "display_name": "Max Tokens", + "advanced": true, + "dynamic": false, + "info": "The maximum number of tokens to generate.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "model_name": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": true, + "placeholder": "", + "show": true, + "name": "model_name", + "display_name": "Model Name", + "advanced": false, + "dynamic": true, + "info": "", + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "presence_penalty": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": -2, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "presence_penalty", + "value": 0.3, + "display_name": "Presence Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for token presence in prior text.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "project_id": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": true, + "placeholder": "", + "show": true, + "name": "project_id", + "value": "", + "display_name": "watsonx Project ID", + "advanced": false, + "dynamic": false, + "info": "The project ID or deployment space ID that is associated with the foundation model.", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "seed": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "seed", + "value": 8, + "display_name": "Random Seed", + "advanced": true, + "dynamic": false, + "info": "The random seed for the model.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "stop_sequence": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stop_sequence", + "value": "", + "display_name": "Stop Sequence", + "advanced": true, + "dynamic": false, + "info": "Sequence where generation should stop.", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "stream": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stream", + "value": false, + "display_name": "Stream", + "advanced": true, + "dynamic": false, + "info": "Stream the response from the model. Streaming works only in Chat.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "system_message": { + "tool_mode": false, + "trace_as_input": true, + "multiline": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system_message", + "value": "", + "display_name": "System Message", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System message to pass to the model.", + "title_case": false, + "copy_field": false, + "type": "str", + "_input_type": "MultilineInput" + }, + "temperature": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "temperature", + "value": 0.1, + "display_name": "Temperature", + "advanced": true, + "dynamic": false, + "info": "Controls randomness, higher values increase diversity.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "top_logprobs": { + "tool_mode": false, + "trace_as_metadata": true, + "range_spec": { + "step_type": "float", + "min": 1, + "max": 20, + "step": 0.1 + }, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_logprobs", + "value": 3, + "display_name": "Top Log Probabilities", + "advanced": true, + "dynamic": false, + "info": "Number of most likely tokens to return at each position.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "top_p": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 1, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "top_p", + "value": 0.9, + "display_name": "Top P", + "advanced": true, + "dynamic": false, + "info": "The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "url": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [ + "https://us-south.ml.cloud.ibm.com", + "https://eu-de.ml.cloud.ibm.com", + "https://eu-gb.ml.cloud.ibm.com", + "https://au-syd.ml.cloud.ibm.com", + "https://jp-tok.ml.cloud.ibm.com", + "https://ca-tor.ml.cloud.ibm.com" + ], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "url", + "display_name": "watsonx API Endpoint", + "advanced": false, + "dynamic": false, + "info": "The base URL of the API.", + "real_time_refresh": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + } }, - { - "name": "langchain_ibm", - "version": "0.3.16" + "description": "Generate text using IBM watsonx.ai foundation models.", + "icon": "WatsonxAI", + "base_classes": [ + "LanguageModel", + "Message" + ], + "display_name": "IBM watsonx.ai", + "documentation": "", + "minimized": false, + "custom_fields": {}, + "output_types": [], + "pinned": false, + "conditional_paths": [], + "frozen": false, + "outputs": [ + { + "types": [ + "Message" + ], + "name": "text_output", + "display_name": "Model Response", + "method": "text_response", + "value": "__UNDEFINED__", + "cache": true, + "allows_loop": false, + "group_outputs": false, + "tool_mode": true + }, + { + "types": [ + "LanguageModel" + ], + "selected": "LanguageModel", + "name": "model_output", + "display_name": "Language Model", + "method": "build_model", + "value": "__UNDEFINED__", + "cache": true, + "allows_loop": false, + "group_outputs": false, + "tool_mode": true + } + ], + "field_order": [ + "input_value", + "system_message", + "stream", + "url", + "project_id", + "api_key", + "model_name", + "max_tokens", + "stop_sequence", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "seed", + "logprobs", + "top_logprobs", + "logit_bias" + ], + "beta": false, + "legacy": false, + "edited": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ], + "module": "lfx.components.ibm.watsonx.WatsonxAIComponent", + "code_hash": "85c24939214c", + "dependencies": { + "total_dependencies": 4, + "dependencies": [ + { + "name": "requests", + "version": "2.32.5" + }, + { + "name": "langchain_ibm", + "version": "0.3.16" + }, + { + "name": "pydantic", + "version": "2.10.6" + }, + { + "name": "lfx", + "version": null + } + ] + } }, - { - "name": "pydantic", - "version": "2.10.6" - }, - { - "name": "langflow", - "version": null - } - ], - "total_dependencies": 4 - }, - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ], - "module": "langflow.components.ibm.watsonx.WatsonxAIComponent" - }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Model Response", - "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "selected": "LanguageModel", - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "API Key", - "dynamic": false, - "info": "The API Key to use for the model.", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "WATSONX_API_KEY" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import json\nfrom typing import Any\n\nimport requests\nfrom langchain_ibm import ChatWatsonx\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, IntInput, SecretStrInput, SliderInput, StrInput\nfrom langflow.logging.logger import logger\nfrom langflow.schema.dotdict import dotdict\n\n\nclass WatsonxAIComponent(LCModelComponent):\n display_name = \"IBM watsonx.ai\"\n description = \"Generate text using IBM watsonx.ai foundation models.\"\n icon = \"WatsonxAI\"\n name = \"IBMwatsonxModel\"\n beta = False\n\n _default_models = [\"ibm/granite-3-2b-instruct\", \"ibm/granite-3-8b-instruct\", \"ibm/granite-13b-instruct-v2\"]\n\n inputs = [\n *LCModelComponent._base_inputs,\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx Project ID\",\n required=True,\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate.\",\n range_spec=RangeSpec(min=1, max=4096),\n value=1000,\n ),\n StrInput(\n name=\"stop_sequence\",\n display_name=\"Stop Sequence\",\n advanced=True,\n info=\"Sequence where generation should stop.\",\n field_type=\"str\",\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n info=\"Controls randomness, higher values increase diversity.\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"top_p\",\n display_name=\"Top P\",\n info=\"The cumulative probability cutoff for token selection. \"\n \"Lower values mean sampling from a smaller, more top-weighted nucleus.\",\n value=0.9,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"frequency_penalty\",\n display_name=\"Frequency Penalty\",\n info=\"Penalty for frequency of token usage.\",\n value=0.5,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"presence_penalty\",\n display_name=\"Presence Penalty\",\n info=\"Penalty for token presence in prior text.\",\n value=0.3,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n IntInput(\n name=\"seed\",\n display_name=\"Random Seed\",\n advanced=True,\n info=\"The random seed for the model.\",\n value=8,\n ),\n BoolInput(\n name=\"logprobs\",\n display_name=\"Log Probabilities\",\n advanced=True,\n info=\"Whether to return log probabilities of the output tokens.\",\n value=True,\n ),\n IntInput(\n name=\"top_logprobs\",\n display_name=\"Top Log Probabilities\",\n advanced=True,\n info=\"Number of most likely tokens to return at each position.\",\n value=3,\n range_spec=RangeSpec(min=1, max=20),\n ),\n StrInput(\n name=\"logit_bias\",\n display_name=\"Logit Bias\",\n advanced=True,\n info='JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).',\n field_type=\"str\",\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\"version\": \"2024-09-16\", \"filters\": \"function_text_chat,!lifecycle_withdrawn\"}\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models. Using default models.\")\n return WatsonxAIComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.info(\"Updating build config. Field name: %s, Field value: %s\", field_name, field_value)\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_model(self) -> LanguageModel:\n # Parse logit_bias from JSON string if provided\n logit_bias = None\n if hasattr(self, \"logit_bias\") and self.logit_bias:\n try:\n logit_bias = json.loads(self.logit_bias)\n except json.JSONDecodeError:\n logger.warning(\"Invalid logit_bias JSON format. Using default instead.\")\n logit_bias = {\"1003\": -100, \"1004\": -100}\n\n chat_params = {\n \"max_tokens\": getattr(self, \"max_tokens\", None),\n \"temperature\": getattr(self, \"temperature\", None),\n \"top_p\": getattr(self, \"top_p\", None),\n \"frequency_penalty\": getattr(self, \"frequency_penalty\", None),\n \"presence_penalty\": getattr(self, \"presence_penalty\", None),\n \"seed\": getattr(self, \"seed\", None),\n \"stop\": [self.stop_sequence] if self.stop_sequence else [],\n \"n\": 1,\n \"logprobs\": getattr(self, \"logprobs\", True),\n \"top_logprobs\": getattr(self, \"top_logprobs\", None),\n \"time_limit\": 600000,\n \"logit_bias\": logit_bias,\n }\n\n return ChatWatsonx(\n apikey=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n project_id=self.project_id,\n model_id=self.model_name,\n params=chat_params,\n streaming=self.stream,\n )\n" - }, - "frequency_penalty": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Frequency Penalty", - "dynamic": false, - "info": "Penalty for frequency of token usage.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "frequency_penalty", - "placeholder": "", - "range_spec": { - "max": 2, - "min": -2, - "step": 0.01, - "step_type": "float" + "tool_mode": false, + "official": false }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.5 + "showNode": true, + "type": "IBMwatsonxModel", + "id": "IBMwatsonxModel-qXZxc", + "selected_output": "model_output" }, - "input_value": { - "_input_type": "MessageInput", - "advanced": false, - "display_name": "Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "input_value", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" + "id": "IBMwatsonxModel-qXZxc", + "position": { + "x": 0, + "y": 0 }, - "logit_bias": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Logit Bias", - "dynamic": false, - "info": "JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "logit_bias", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "logprobs": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Log Probabilities", - "dynamic": false, - "info": "Whether to return log probabilities of the output tokens.", - "list": false, - "list_add_label": "Add More", - "name": "logprobs", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "max_tokens": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Max Tokens", - "dynamic": false, - "info": "The maximum number of tokens to generate.", - "list": false, - "list_add_label": "Add More", - "name": "max_tokens", - "placeholder": "", - "range_spec": { - "max": 4096, - "min": 1, - "step": 0.1, - "step_type": "float" - }, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 1000 - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": true, - "info": "", - "name": "model_name", - "options": [ - "ibm/granite-3-2-8b-instruct", - "ibm/granite-3-2b-instruct", - "ibm/granite-3-3-8b-instruct", - "ibm/granite-3-8b-instruct", - "ibm/granite-guardian-3-2b", - "ibm/granite-guardian-3-8b", - "ibm/granite-vision-3-2-2b", - "meta-llama/llama-3-2-11b-vision-instruct", - "meta-llama/llama-3-2-90b-vision-instruct", - "meta-llama/llama-3-3-70b-instruct", - "meta-llama/llama-3-405b-instruct", - "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", - "meta-llama/llama-guard-3-11b-vision", - "mistralai/mistral-large", - "mistralai/mistral-medium-2505", - "mistralai/mistral-small-3-1-24b-instruct-2503", - "mistralai/pixtral-12b", - "openai/gpt-oss-120b" - ], - "options_metadata": [], - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "ibm/granite-3-2-8b-instruct" - }, - "presence_penalty": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Presence Penalty", - "dynamic": false, - "info": "Penalty for token presence in prior text.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "presence_penalty", - "placeholder": "", - "range_spec": { - "max": 2, - "min": -2, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.3 - }, - "project_id": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "watsonx Project ID", - "dynamic": false, - "info": "The project ID or deployment space ID that is associated with the foundation model.", - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "project_id", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "WATSONX_PROJECT_ID" - }, - "seed": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Random Seed", - "dynamic": false, - "info": "The random seed for the model.", - "list": false, - "list_add_label": "Add More", - "name": "seed", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 8 - }, - "stop_sequence": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Stop Sequence", - "dynamic": false, - "info": "Sequence where generation should stop.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "stop_sequence", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "stream": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Stream", - "dynamic": false, - "info": "Stream the response from the model. Streaming works only in Chat.", - "list": false, - "list_add_label": "Add More", - "name": "stream", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "system_message": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "System Message", - "dynamic": false, - "info": "System message to pass to the model.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_message", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "temperature": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Temperature", - "dynamic": false, - "info": "Controls randomness, higher values increase diversity.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", - "placeholder": "", - "range_spec": { - "max": 2, - "min": 0, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.1 - }, - "top_logprobs": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Top Log Probabilities", - "dynamic": false, - "info": "Number of most likely tokens to return at each position.", - "list": false, - "list_add_label": "Add More", - "name": "top_logprobs", - "placeholder": "", - "range_spec": { - "max": 20, - "min": 1, - "step": 0.1, - "step_type": "float" - }, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 3 - }, - "top_p": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Top P", - "dynamic": false, - "info": "The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "top_p", - "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.9 - }, - "url": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "watsonx API Endpoint", - "dynamic": false, - "info": "The base URL of the API.", - "name": "url", - "options": [ - "https://us-south.ml.cloud.ibm.com", - "https://eu-de.ml.cloud.ibm.com", - "https://eu-gb.ml.cloud.ibm.com", - "https://au-syd.ml.cloud.ibm.com", - "https://jp-tok.ml.cloud.ibm.com", - "https://ca-tor.ml.cloud.ibm.com" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "https://us-south.ml.cloud.ibm.com" - } - }, - "tool_mode": false - }, - "selected_output": "model_output", - "showNode": true, - "type": "IBMwatsonxModel" + "type": "genericNode" + } + ], + "viewport": { + "x": 1, + "y": 1, + "zoom": 1 + } }, - "dragging": false, - "id": "IBMwatsonxModel-jA4Nw", - "measured": { - "height": 632, - "width": 320 - }, - "position": { - "x": 371.93566807042805, - "y": 197.47711431325635 - }, - "selected": false, - "type": "genericNode" + "description": "Generate text using IBM watsonx.ai foundation models.", + "name": "IBM watsonx.ai", + "id": "IBMwatsonxModel-qXZxc", + "is_component": true, + "last_tested_version": "1.6.0" } \ No newline at end of file diff --git a/flows/components/watsonx_llm_text.json b/flows/components/watsonx_llm_text.json index a2966a48..a1116bf1 100644 --- a/flows/components/watsonx_llm_text.json +++ b/flows/components/watsonx_llm_text.json @@ -1,551 +1,538 @@ { - "data": { - "id": "IBMwatsonxModel-18kmA", - "node": { - "base_classes": [ - "LanguageModel", - "Message" - ], - "beta": false, - "conditional_paths": [], - "custom_fields": {}, - "description": "Generate text using IBM watsonx.ai foundation models.", - "display_name": "IBM watsonx.ai", - "documentation": "", - "edited": false, - "field_order": [ - "input_value", - "system_message", - "stream", - "url", - "project_id", - "api_key", - "model_name", - "max_tokens", - "stop_sequence", - "temperature", - "top_p", - "frequency_penalty", - "presence_penalty", - "seed", - "logprobs", - "top_logprobs", - "logit_bias" - ], - "frozen": false, - "icon": "WatsonxAI", - "last_updated": "2025-09-22T20:03:31.248Z", - "legacy": false, - "metadata": { - "code_hash": "7767fd69a954", - "dependencies": { - "dependencies": [ + "data": { + "edges": [], + "nodes": [ + { + "data": { + "node": { + "template": { + "_type": "Component", + "api_key": { + "load_from_db": false, + "required": true, + "placeholder": "", + "show": true, + "name": "api_key", + "value": "", + "display_name": "Watsonx API Key", + "advanced": false, + "input_types": [], + "dynamic": false, + "info": "The API Key to use for the model.", + "title_case": false, + "password": true, + "type": "str", + "_input_type": "SecretStrInput" + }, + "code": { + "type": "code", + "required": true, + "placeholder": "", + "list": false, + "show": true, + "multiline": true, + "value": "import json\nfrom typing import Any\n\nimport requests\nfrom langchain_ibm import ChatWatsonx\nfrom pydantic.v1 import SecretStr\n\nfrom lfx.base.models.model import LCModelComponent\nfrom lfx.field_typing import LanguageModel\nfrom lfx.field_typing.range_spec import RangeSpec\nfrom lfx.inputs.inputs import BoolInput, DropdownInput, IntInput, SecretStrInput, SliderInput, StrInput\nfrom lfx.log.logger import logger\nfrom lfx.schema.dotdict import dotdict\n\n\nclass WatsonxAIComponent(LCModelComponent):\n display_name = \"IBM watsonx.ai\"\n description = \"Generate text using IBM watsonx.ai foundation models.\"\n icon = \"WatsonxAI\"\n name = \"IBMwatsonxModel\"\n beta = False\n\n _default_models = [\"ibm/granite-3-2b-instruct\", \"ibm/granite-3-8b-instruct\", \"ibm/granite-13b-instruct-v2\"]\n\n inputs = [\n *LCModelComponent.get_base_inputs(),\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx Project ID\",\n required=True,\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"Watsonx API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate.\",\n range_spec=RangeSpec(min=1, max=4096),\n value=1000,\n ),\n StrInput(\n name=\"stop_sequence\",\n display_name=\"Stop Sequence\",\n advanced=True,\n info=\"Sequence where generation should stop.\",\n field_type=\"str\",\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n info=\"Controls randomness, higher values increase diversity.\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"top_p\",\n display_name=\"Top P\",\n info=\"The cumulative probability cutoff for token selection. \"\n \"Lower values mean sampling from a smaller, more top-weighted nucleus.\",\n value=0.9,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"frequency_penalty\",\n display_name=\"Frequency Penalty\",\n info=\"Penalty for frequency of token usage.\",\n value=0.5,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"presence_penalty\",\n display_name=\"Presence Penalty\",\n info=\"Penalty for token presence in prior text.\",\n value=0.3,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n IntInput(\n name=\"seed\",\n display_name=\"Random Seed\",\n advanced=True,\n info=\"The random seed for the model.\",\n value=8,\n ),\n BoolInput(\n name=\"logprobs\",\n display_name=\"Log Probabilities\",\n advanced=True,\n info=\"Whether to return log probabilities of the output tokens.\",\n value=True,\n ),\n IntInput(\n name=\"top_logprobs\",\n display_name=\"Top Log Probabilities\",\n advanced=True,\n info=\"Number of most likely tokens to return at each position.\",\n value=3,\n range_spec=RangeSpec(min=1, max=20),\n ),\n StrInput(\n name=\"logit_bias\",\n display_name=\"Logit Bias\",\n advanced=True,\n info='JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).',\n field_type=\"str\",\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\"version\": \"2024-09-16\", \"filters\": \"function_text_chat,!lifecycle_withdrawn\"}\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models. Using default models.\")\n return WatsonxAIComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.info(\"Updating build config. Field name: %s, Field value: %s\", field_name, field_value)\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_model(self) -> LanguageModel:\n # Parse logit_bias from JSON string if provided\n logit_bias = None\n if hasattr(self, \"logit_bias\") and self.logit_bias:\n try:\n logit_bias = json.loads(self.logit_bias)\n except json.JSONDecodeError:\n logger.warning(\"Invalid logit_bias JSON format. Using default instead.\")\n logit_bias = {\"1003\": -100, \"1004\": -100}\n\n chat_params = {\n \"max_tokens\": getattr(self, \"max_tokens\", None),\n \"temperature\": getattr(self, \"temperature\", None),\n \"top_p\": getattr(self, \"top_p\", None),\n \"frequency_penalty\": getattr(self, \"frequency_penalty\", None),\n \"presence_penalty\": getattr(self, \"presence_penalty\", None),\n \"seed\": getattr(self, \"seed\", None),\n \"stop\": [self.stop_sequence] if self.stop_sequence else [],\n \"n\": 1,\n \"logprobs\": getattr(self, \"logprobs\", True),\n \"top_logprobs\": getattr(self, \"top_logprobs\", None),\n \"time_limit\": 600000,\n \"logit_bias\": logit_bias,\n }\n\n return ChatWatsonx(\n apikey=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n project_id=self.project_id,\n model_id=self.model_name,\n params=chat_params,\n streaming=self.stream,\n )\n", + "fileTypes": [], + "file_path": "", + "password": false, + "name": "code", + "advanced": true, + "dynamic": true, + "info": "", + "load_from_db": false, + "title_case": false + }, + "frequency_penalty": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": -2, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "frequency_penalty", + "value": 0.5, + "display_name": "Frequency Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for frequency of token usage.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "input_value": { + "trace_as_input": true, + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "input_value", + "value": "", + "display_name": "Input", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "", + "title_case": false, + "type": "str", + "_input_type": "MessageInput" + }, + "logit_bias": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "logit_bias", + "value": "", + "display_name": "Logit Bias", + "advanced": true, + "dynamic": false, + "info": "JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "logprobs": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "logprobs", + "value": true, + "display_name": "Log Probabilities", + "advanced": true, + "dynamic": false, + "info": "Whether to return log probabilities of the output tokens.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "max_tokens": { + "tool_mode": false, + "trace_as_metadata": true, + "range_spec": { + "step_type": "float", + "min": 1, + "max": 4096, + "step": 0.1 + }, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "max_tokens", + "value": 1000, + "display_name": "Max Tokens", + "advanced": true, + "dynamic": false, + "info": "The maximum number of tokens to generate.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "model_name": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": true, + "placeholder": "", + "show": true, + "name": "model_name", + "display_name": "Model Name", + "advanced": false, + "dynamic": true, + "info": "", + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + }, + "presence_penalty": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": -2, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "presence_penalty", + "value": 0.3, + "display_name": "Presence Penalty", + "advanced": true, + "dynamic": false, + "info": "Penalty for token presence in prior text.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "project_id": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": true, + "placeholder": "", + "show": true, + "name": "project_id", + "value": "", + "display_name": "watsonx Project ID", + "advanced": false, + "dynamic": false, + "info": "The project ID or deployment space ID that is associated with the foundation model.", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "seed": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "seed", + "value": 8, + "display_name": "Random Seed", + "advanced": true, + "dynamic": false, + "info": "The random seed for the model.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "stop_sequence": { + "tool_mode": false, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stop_sequence", + "value": "", + "display_name": "Stop Sequence", + "advanced": true, + "dynamic": false, + "info": "Sequence where generation should stop.", + "title_case": false, + "type": "str", + "_input_type": "StrInput" + }, + "stream": { + "tool_mode": false, + "trace_as_metadata": true, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "stream", + "value": false, + "display_name": "Stream", + "advanced": true, + "dynamic": false, + "info": "Stream the response from the model. Streaming works only in Chat.", + "title_case": false, + "type": "bool", + "_input_type": "BoolInput" + }, + "system_message": { + "tool_mode": false, + "trace_as_input": true, + "multiline": true, + "trace_as_metadata": true, + "load_from_db": false, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "system_message", + "value": "", + "display_name": "System Message", + "advanced": false, + "input_types": [ + "Message" + ], + "dynamic": false, + "info": "System message to pass to the model.", + "title_case": false, + "copy_field": false, + "type": "str", + "_input_type": "MultilineInput" + }, + "temperature": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 2, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "temperature", + "value": 0.1, + "display_name": "Temperature", + "advanced": true, + "dynamic": false, + "info": "Controls randomness, higher values increase diversity.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "top_logprobs": { + "tool_mode": false, + "trace_as_metadata": true, + "range_spec": { + "step_type": "float", + "min": 1, + "max": 20, + "step": 0.1 + }, + "list": false, + "list_add_label": "Add More", + "required": false, + "placeholder": "", + "show": true, + "name": "top_logprobs", + "value": 3, + "display_name": "Top Log Probabilities", + "advanced": true, + "dynamic": false, + "info": "Number of most likely tokens to return at each position.", + "title_case": false, + "type": "int", + "_input_type": "IntInput" + }, + "top_p": { + "tool_mode": false, + "min_label": "", + "max_label": "", + "min_label_icon": "", + "max_label_icon": "", + "slider_buttons": false, + "slider_buttons_options": [], + "slider_input": false, + "range_spec": { + "step_type": "float", + "min": 0, + "max": 1, + "step": 0.01 + }, + "required": false, + "placeholder": "", + "show": true, + "name": "top_p", + "value": 0.9, + "display_name": "Top P", + "advanced": true, + "dynamic": false, + "info": "The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.", + "title_case": false, + "type": "slider", + "_input_type": "SliderInput" + }, + "url": { + "tool_mode": false, + "trace_as_metadata": true, + "options": [ + "https://us-south.ml.cloud.ibm.com", + "https://eu-de.ml.cloud.ibm.com", + "https://eu-gb.ml.cloud.ibm.com", + "https://au-syd.ml.cloud.ibm.com", + "https://jp-tok.ml.cloud.ibm.com", + "https://ca-tor.ml.cloud.ibm.com" + ], + "options_metadata": [], + "combobox": false, + "dialog_inputs": {}, + "toggle": false, + "required": false, + "placeholder": "", + "show": true, + "name": "url", + "display_name": "watsonx API Endpoint", + "advanced": false, + "dynamic": false, + "info": "The base URL of the API.", + "real_time_refresh": true, + "title_case": false, + "external_options": {}, + "type": "str", + "_input_type": "DropdownInput" + } + }, + "description": "Generate text using IBM watsonx.ai foundation models.", + "icon": "WatsonxAI", + "base_classes": [ + "LanguageModel", + "Message" + ], + "display_name": "IBM watsonx.ai", + "documentation": "", + "minimized": false, + "custom_fields": {}, + "output_types": [], + "pinned": false, + "conditional_paths": [], + "frozen": false, + "outputs": [ { - "name": "requests", - "version": "2.32.5" + "types": [ + "Message" + ], + "selected": "Message", + "name": "text_output", + "display_name": "Model Response", + "method": "text_response", + "value": "__UNDEFINED__", + "cache": true, + "allows_loop": false, + "group_outputs": false, + "tool_mode": true }, { - "name": "langchain_ibm", - "version": "0.3.16" - }, - { - "name": "pydantic", - "version": "2.10.6" - }, - { - "name": "langflow", - "version": null + "types": [ + "LanguageModel" + ], + "name": "model_output", + "display_name": "Language Model", + "method": "build_model", + "value": "__UNDEFINED__", + "cache": true, + "allows_loop": false, + "group_outputs": false, + "tool_mode": true } ], - "total_dependencies": 4 + "field_order": [ + "input_value", + "system_message", + "stream", + "url", + "project_id", + "api_key", + "model_name", + "max_tokens", + "stop_sequence", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "seed", + "logprobs", + "top_logprobs", + "logit_bias" + ], + "beta": false, + "legacy": false, + "edited": false, + "metadata": { + "keywords": [ + "model", + "llm", + "language model", + "large language model" + ], + "module": "lfx.components.ibm.watsonx.WatsonxAIComponent", + "code_hash": "85c24939214c", + "dependencies": { + "total_dependencies": 4, + "dependencies": [ + { + "name": "requests", + "version": "2.32.5" + }, + { + "name": "langchain_ibm", + "version": "0.3.16" + }, + { + "name": "pydantic", + "version": "2.10.6" + }, + { + "name": "lfx", + "version": null + } + ] + } + }, + "tool_mode": false, + "official": false }, - "keywords": [ - "model", - "llm", - "language model", - "large language model" - ], - "module": "langflow.components.ibm.watsonx.WatsonxAIComponent" + "showNode": true, + "type": "IBMwatsonxModel", + "id": "IBMwatsonxModel-qXZxc", + "selected_output": "text_output" }, - "minimized": false, - "output_types": [], - "outputs": [ - { - "allows_loop": false, - "cache": true, - "display_name": "Model Response", - "group_outputs": false, - "method": "text_response", - "name": "text_output", - "options": null, - "required_inputs": null, - "selected": "Message", - "tool_mode": true, - "types": [ - "Message" - ], - "value": "__UNDEFINED__" - }, - { - "allows_loop": false, - "cache": true, - "display_name": "Language Model", - "group_outputs": false, - "method": "build_model", - "name": "model_output", - "options": null, - "required_inputs": null, - "selected": "LanguageModel", - "tool_mode": true, - "types": [ - "LanguageModel" - ], - "value": "__UNDEFINED__" - } - ], - "pinned": false, - "template": { - "_type": "Component", - "api_key": { - "_input_type": "SecretStrInput", - "advanced": false, - "display_name": "API Key", - "dynamic": false, - "info": "The API Key to use for the model.", - "input_types": [], - "load_from_db": true, - "name": "api_key", - "password": true, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "str", - "value": "WATSONX_API_KEY" - }, - "code": { - "advanced": true, - "dynamic": true, - "fileTypes": [], - "file_path": "", - "info": "", - "list": false, - "load_from_db": false, - "multiline": true, - "name": "code", - "password": false, - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "type": "code", - "value": "import json\nfrom typing import Any\n\nimport requests\nfrom langchain_ibm import ChatWatsonx\nfrom pydantic.v1 import SecretStr\n\nfrom langflow.base.models.model import LCModelComponent\nfrom langflow.field_typing import LanguageModel\nfrom langflow.field_typing.range_spec import RangeSpec\nfrom langflow.inputs.inputs import BoolInput, DropdownInput, IntInput, SecretStrInput, SliderInput, StrInput\nfrom langflow.logging.logger import logger\nfrom langflow.schema.dotdict import dotdict\n\n\nclass WatsonxAIComponent(LCModelComponent):\n display_name = \"IBM watsonx.ai\"\n description = \"Generate text using IBM watsonx.ai foundation models.\"\n icon = \"WatsonxAI\"\n name = \"IBMwatsonxModel\"\n beta = False\n\n _default_models = [\"ibm/granite-3-2b-instruct\", \"ibm/granite-3-8b-instruct\", \"ibm/granite-13b-instruct-v2\"]\n\n inputs = [\n *LCModelComponent._base_inputs,\n DropdownInput(\n name=\"url\",\n display_name=\"watsonx API Endpoint\",\n info=\"The base URL of the API.\",\n value=None,\n options=[\n \"https://us-south.ml.cloud.ibm.com\",\n \"https://eu-de.ml.cloud.ibm.com\",\n \"https://eu-gb.ml.cloud.ibm.com\",\n \"https://au-syd.ml.cloud.ibm.com\",\n \"https://jp-tok.ml.cloud.ibm.com\",\n \"https://ca-tor.ml.cloud.ibm.com\",\n ],\n real_time_refresh=True,\n ),\n StrInput(\n name=\"project_id\",\n display_name=\"watsonx Project ID\",\n required=True,\n info=\"The project ID or deployment space ID that is associated with the foundation model.\",\n ),\n SecretStrInput(\n name=\"api_key\",\n display_name=\"API Key\",\n info=\"The API Key to use for the model.\",\n required=True,\n ),\n DropdownInput(\n name=\"model_name\",\n display_name=\"Model Name\",\n options=[],\n value=None,\n dynamic=True,\n required=True,\n ),\n IntInput(\n name=\"max_tokens\",\n display_name=\"Max Tokens\",\n advanced=True,\n info=\"The maximum number of tokens to generate.\",\n range_spec=RangeSpec(min=1, max=4096),\n value=1000,\n ),\n StrInput(\n name=\"stop_sequence\",\n display_name=\"Stop Sequence\",\n advanced=True,\n info=\"Sequence where generation should stop.\",\n field_type=\"str\",\n ),\n SliderInput(\n name=\"temperature\",\n display_name=\"Temperature\",\n info=\"Controls randomness, higher values increase diversity.\",\n value=0.1,\n range_spec=RangeSpec(min=0, max=2, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"top_p\",\n display_name=\"Top P\",\n info=\"The cumulative probability cutoff for token selection. \"\n \"Lower values mean sampling from a smaller, more top-weighted nucleus.\",\n value=0.9,\n range_spec=RangeSpec(min=0, max=1, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"frequency_penalty\",\n display_name=\"Frequency Penalty\",\n info=\"Penalty for frequency of token usage.\",\n value=0.5,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n SliderInput(\n name=\"presence_penalty\",\n display_name=\"Presence Penalty\",\n info=\"Penalty for token presence in prior text.\",\n value=0.3,\n range_spec=RangeSpec(min=-2.0, max=2.0, step=0.01),\n advanced=True,\n ),\n IntInput(\n name=\"seed\",\n display_name=\"Random Seed\",\n advanced=True,\n info=\"The random seed for the model.\",\n value=8,\n ),\n BoolInput(\n name=\"logprobs\",\n display_name=\"Log Probabilities\",\n advanced=True,\n info=\"Whether to return log probabilities of the output tokens.\",\n value=True,\n ),\n IntInput(\n name=\"top_logprobs\",\n display_name=\"Top Log Probabilities\",\n advanced=True,\n info=\"Number of most likely tokens to return at each position.\",\n value=3,\n range_spec=RangeSpec(min=1, max=20),\n ),\n StrInput(\n name=\"logit_bias\",\n display_name=\"Logit Bias\",\n advanced=True,\n info='JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).',\n field_type=\"str\",\n ),\n ]\n\n @staticmethod\n def fetch_models(base_url: str) -> list[str]:\n \"\"\"Fetch available models from the watsonx.ai API.\"\"\"\n try:\n endpoint = f\"{base_url}/ml/v1/foundation_model_specs\"\n params = {\"version\": \"2024-09-16\", \"filters\": \"function_text_chat,!lifecycle_withdrawn\"}\n response = requests.get(endpoint, params=params, timeout=10)\n response.raise_for_status()\n data = response.json()\n models = [model[\"model_id\"] for model in data.get(\"resources\", [])]\n return sorted(models)\n except Exception: # noqa: BLE001\n logger.exception(\"Error fetching models. Using default models.\")\n return WatsonxAIComponent._default_models\n\n def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):\n \"\"\"Update model options when URL or API key changes.\"\"\"\n logger.info(\"Updating build config. Field name: %s, Field value: %s\", field_name, field_value)\n\n if field_name == \"url\" and field_value:\n try:\n models = self.fetch_models(base_url=build_config.url.value)\n build_config.model_name.options = models\n if build_config.model_name.value:\n build_config.model_name.value = models[0]\n info_message = f\"Updated model options: {len(models)} models found in {build_config.url.value}\"\n logger.info(info_message)\n except Exception: # noqa: BLE001\n logger.exception(\"Error updating model options.\")\n\n def build_model(self) -> LanguageModel:\n # Parse logit_bias from JSON string if provided\n logit_bias = None\n if hasattr(self, \"logit_bias\") and self.logit_bias:\n try:\n logit_bias = json.loads(self.logit_bias)\n except json.JSONDecodeError:\n logger.warning(\"Invalid logit_bias JSON format. Using default instead.\")\n logit_bias = {\"1003\": -100, \"1004\": -100}\n\n chat_params = {\n \"max_tokens\": getattr(self, \"max_tokens\", None),\n \"temperature\": getattr(self, \"temperature\", None),\n \"top_p\": getattr(self, \"top_p\", None),\n \"frequency_penalty\": getattr(self, \"frequency_penalty\", None),\n \"presence_penalty\": getattr(self, \"presence_penalty\", None),\n \"seed\": getattr(self, \"seed\", None),\n \"stop\": [self.stop_sequence] if self.stop_sequence else [],\n \"n\": 1,\n \"logprobs\": getattr(self, \"logprobs\", True),\n \"top_logprobs\": getattr(self, \"top_logprobs\", None),\n \"time_limit\": 600000,\n \"logit_bias\": logit_bias,\n }\n\n return ChatWatsonx(\n apikey=SecretStr(self.api_key).get_secret_value(),\n url=self.url,\n project_id=self.project_id,\n model_id=self.model_name,\n params=chat_params,\n streaming=self.stream,\n )\n" - }, - "frequency_penalty": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Frequency Penalty", - "dynamic": false, - "info": "Penalty for frequency of token usage.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "frequency_penalty", - "placeholder": "", - "range_spec": { - "max": 2, - "min": -2, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.5 - }, - "input_value": { - "_input_type": "MessageInput", - "advanced": false, - "display_name": "Input", - "dynamic": false, - "info": "", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "input_value", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "logit_bias": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Logit Bias", - "dynamic": false, - "info": "JSON string of token IDs to bias or suppress (e.g., {\"1003\": -100, \"1004\": 100}).", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "logit_bias", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "logprobs": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Log Probabilities", - "dynamic": false, - "info": "Whether to return log probabilities of the output tokens.", - "list": false, - "list_add_label": "Add More", - "name": "logprobs", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": true - }, - "max_tokens": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Max Tokens", - "dynamic": false, - "info": "The maximum number of tokens to generate.", - "list": false, - "list_add_label": "Add More", - "name": "max_tokens", - "placeholder": "", - "range_spec": { - "max": 4096, - "min": 1, - "step": 0.1, - "step_type": "float" - }, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 1000 - }, - "model_name": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "Model Name", - "dynamic": true, - "info": "", - "name": "model_name", - "options": [ - "ibm/granite-3-2-8b-instruct", - "ibm/granite-3-2b-instruct", - "ibm/granite-3-3-8b-instruct", - "ibm/granite-3-8b-instruct", - "ibm/granite-guardian-3-2b", - "ibm/granite-guardian-3-8b", - "ibm/granite-vision-3-2-2b", - "meta-llama/llama-3-2-11b-vision-instruct", - "meta-llama/llama-3-2-90b-vision-instruct", - "meta-llama/llama-3-3-70b-instruct", - "meta-llama/llama-3-405b-instruct", - "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", - "meta-llama/llama-guard-3-11b-vision", - "mistralai/mistral-large", - "mistralai/mistral-medium-2505", - "mistralai/mistral-small-3-1-24b-instruct-2503", - "mistralai/pixtral-12b", - "openai/gpt-oss-120b" - ], - "options_metadata": [], - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "ibm/granite-3-2-8b-instruct" - }, - "presence_penalty": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Presence Penalty", - "dynamic": false, - "info": "Penalty for token presence in prior text.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "presence_penalty", - "placeholder": "", - "range_spec": { - "max": 2, - "min": -2, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.3 - }, - "project_id": { - "_input_type": "StrInput", - "advanced": false, - "display_name": "watsonx Project ID", - "dynamic": false, - "info": "The project ID or deployment space ID that is associated with the foundation model.", - "list": false, - "list_add_label": "Add More", - "load_from_db": true, - "name": "project_id", - "placeholder": "", - "required": true, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "WATSONX_PROJECT_ID" - }, - "seed": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Random Seed", - "dynamic": false, - "info": "The random seed for the model.", - "list": false, - "list_add_label": "Add More", - "name": "seed", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 8 - }, - "stop_sequence": { - "_input_type": "StrInput", - "advanced": true, - "display_name": "Stop Sequence", - "dynamic": false, - "info": "Sequence where generation should stop.", - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "name": "stop_sequence", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "stream": { - "_input_type": "BoolInput", - "advanced": true, - "display_name": "Stream", - "dynamic": false, - "info": "Stream the response from the model. Streaming works only in Chat.", - "list": false, - "list_add_label": "Add More", - "name": "stream", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "bool", - "value": false - }, - "system_message": { - "_input_type": "MultilineInput", - "advanced": false, - "copy_field": false, - "display_name": "System Message", - "dynamic": false, - "info": "System message to pass to the model.", - "input_types": [ - "Message" - ], - "list": false, - "list_add_label": "Add More", - "load_from_db": false, - "multiline": true, - "name": "system_message", - "placeholder": "", - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_input": true, - "trace_as_metadata": true, - "type": "str", - "value": "" - }, - "temperature": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Temperature", - "dynamic": false, - "info": "Controls randomness, higher values increase diversity.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "temperature", - "placeholder": "", - "range_spec": { - "max": 2, - "min": 0, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.1 - }, - "top_logprobs": { - "_input_type": "IntInput", - "advanced": true, - "display_name": "Top Log Probabilities", - "dynamic": false, - "info": "Number of most likely tokens to return at each position.", - "list": false, - "list_add_label": "Add More", - "name": "top_logprobs", - "placeholder": "", - "range_spec": { - "max": 20, - "min": 1, - "step": 0.1, - "step_type": "float" - }, - "required": false, - "show": true, - "title_case": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "int", - "value": 3 - }, - "top_p": { - "_input_type": "SliderInput", - "advanced": true, - "display_name": "Top P", - "dynamic": false, - "info": "The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus.", - "max_label": "", - "max_label_icon": "", - "min_label": "", - "min_label_icon": "", - "name": "top_p", - "placeholder": "", - "range_spec": { - "max": 1, - "min": 0, - "step": 0.01, - "step_type": "float" - }, - "required": false, - "show": true, - "slider_buttons": false, - "slider_buttons_options": [], - "slider_input": false, - "title_case": false, - "tool_mode": false, - "type": "slider", - "value": 0.9 - }, - "url": { - "_input_type": "DropdownInput", - "advanced": false, - "combobox": false, - "dialog_inputs": {}, - "display_name": "watsonx API Endpoint", - "dynamic": false, - "info": "The base URL of the API.", - "name": "url", - "options": [ - "https://us-south.ml.cloud.ibm.com", - "https://eu-de.ml.cloud.ibm.com", - "https://eu-gb.ml.cloud.ibm.com", - "https://au-syd.ml.cloud.ibm.com", - "https://jp-tok.ml.cloud.ibm.com", - "https://ca-tor.ml.cloud.ibm.com" - ], - "options_metadata": [], - "placeholder": "", - "real_time_refresh": true, - "required": false, - "show": true, - "title_case": false, - "toggle": false, - "tool_mode": false, - "trace_as_metadata": true, - "type": "str", - "value": "https://us-south.ml.cloud.ibm.com" - } + "id": "IBMwatsonxModel-qXZxc", + "position": { + "x": 0, + "y": 0 }, - "tool_mode": false - }, - "selected_output": "text_output", - "showNode": true, - "type": "IBMwatsonxModel" - }, - "dragging": false, - "id": "IBMwatsonxModel-18kmA", - "measured": { - "height": 632, - "width": 320 - }, - "position": { - "x": 370.8989669694083, - "y": 184 - }, - "selected": true, - "type": "genericNode" - } \ No newline at end of file + "type": "genericNode" + } + ], + "viewport": { + "x": 1, + "y": 1, + "zoom": 1 + } + }, + "description": "Generate text using IBM watsonx.ai foundation models.", + "name": "IBM watsonx.ai", + "id": "IBMwatsonxModel-qXZxc", + "is_component": true, + "last_tested_version": "1.6.0" +} \ No newline at end of file diff --git a/flows/openrag_ingest_docling.json b/flows/openrag_ingest_docling.json index cce73398..f0e8b164 100644 --- a/flows/openrag_ingest_docling.json +++ b/flows/openrag_ingest_docling.json @@ -116,6 +116,143 @@ "sourceHandle": "{œdataTypeœ:œEmbeddingModelœ,œidœ:œEmbeddingModel-eZ6bTœ,œnameœ:œembeddingsœ,œoutput_typesœ:[œEmbeddingsœ]}", "target": "OpenSearchHybrid-XtKoA", "targetHandle": "{œfieldNameœ:œembeddingœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œEmbeddingsœ],œtypeœ:œotherœ}" + }, + { + "animated": false, + "data": { + "sourceHandle": { + "dataType": "SecretInput", + "id": "SecretInput-GddHQ", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "dynamic_connector_type", + "id": "AdvancedDynamicFormBuilder-rDFKw", + "inputTypes": [ + "Text", + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-SecretInput-GddHQ{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-GddHQœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-AdvancedDynamicFormBuilder-rDFKw{œfieldNameœ:œdynamic_connector_typeœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "SecretInput-GddHQ", + "sourceHandle": "{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-GddHQœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}", + "target": "AdvancedDynamicFormBuilder-rDFKw", + "targetHandle": "{œfieldNameœ:œdynamic_connector_typeœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}" + }, + { + "animated": false, + "data": { + "sourceHandle": { + "dataType": "SecretInput", + "id": "SecretInput-8QSeL", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "dynamic_owner", + "id": "AdvancedDynamicFormBuilder-rDFKw", + "inputTypes": [ + "Text", + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-SecretInput-8QSeL{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-8QSeLœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-AdvancedDynamicFormBuilder-rDFKw{œfieldNameœ:œdynamic_ownerœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "SecretInput-8QSeL", + "sourceHandle": "{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-8QSeLœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}", + "target": "AdvancedDynamicFormBuilder-rDFKw", + "targetHandle": "{œfieldNameœ:œdynamic_ownerœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}" + }, + { + "animated": false, + "data": { + "sourceHandle": { + "dataType": "SecretInput", + "id": "SecretInput-qdu4S", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "dynamic_owner_email", + "id": "AdvancedDynamicFormBuilder-rDFKw", + "inputTypes": [ + "Text", + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-SecretInput-qdu4S{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-qdu4Sœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-AdvancedDynamicFormBuilder-rDFKw{œfieldNameœ:œdynamic_owner_emailœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "SecretInput-qdu4S", + "sourceHandle": "{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-qdu4Sœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}", + "target": "AdvancedDynamicFormBuilder-rDFKw", + "targetHandle": "{œfieldNameœ:œdynamic_owner_emailœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}" + }, + { + "animated": false, + "data": { + "sourceHandle": { + "dataType": "SecretInput", + "id": "SecretInput-p9iHD", + "name": "text", + "output_types": [ + "Message" + ] + }, + "targetHandle": { + "fieldName": "dynamic_owner_name", + "id": "AdvancedDynamicFormBuilder-rDFKw", + "inputTypes": [ + "Text", + "Message" + ], + "type": "str" + } + }, + "id": "reactflow__edge-SecretInput-p9iHD{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-p9iHDœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}-AdvancedDynamicFormBuilder-rDFKw{œfieldNameœ:œdynamic_owner_nameœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}", + "selected": false, + "source": "SecretInput-p9iHD", + "sourceHandle": "{œdataTypeœ:œSecretInputœ,œidœ:œSecretInput-p9iHDœ,œnameœ:œtextœ,œoutput_typesœ:[œMessageœ]}", + "target": "AdvancedDynamicFormBuilder-rDFKw", + "targetHandle": "{œfieldNameœ:œdynamic_owner_nameœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œinputTypesœ:[œTextœ,œMessageœ],œtypeœ:œstrœ}" + }, + { + "data": { + "sourceHandle": { + "dataType": "AdvancedDynamicFormBuilder", + "id": "AdvancedDynamicFormBuilder-rDFKw", + "name": "form_data", + "output_types": [ + "Data" + ] + }, + "targetHandle": { + "fieldName": "docs_metadata", + "id": "OpenSearchHybrid-XtKoA", + "inputTypes": [ + "Data" + ], + "type": "table" + } + }, + "id": "xy-edge__AdvancedDynamicFormBuilder-rDFKw{œdataTypeœ:œAdvancedDynamicFormBuilderœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œnameœ:œform_dataœ,œoutput_typesœ:[œDataœ]}-OpenSearchHybrid-XtKoA{œfieldNameœ:œdocs_metadataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ],œtypeœ:œtableœ}", + "source": "AdvancedDynamicFormBuilder-rDFKw", + "sourceHandle": "{œdataTypeœ:œAdvancedDynamicFormBuilderœ,œidœ:œAdvancedDynamicFormBuilder-rDFKwœ,œnameœ:œform_dataœ,œoutput_typesœ:[œDataœ]}", + "target": "OpenSearchHybrid-XtKoA", + "targetHandle": "{œfieldNameœ:œdocs_metadataœ,œidœ:œOpenSearchHybrid-XtKoAœ,œinputTypesœ:[œDataœ],œtypeœ:œtableœ}" } ], "nodes": [ @@ -373,8 +510,8 @@ "beta": false, "conditional_paths": [], "custom_fields": {}, - "description": "Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.", - "display_name": "OpenSearch (Hybrid)", + "description": "Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.", + "display_name": "OpenSearch", "documentation": "", "edited": true, "field_order": [ @@ -405,21 +542,21 @@ "icon": "OpenSearch", "legacy": false, "metadata": { - "code_hash": "deee3f04cb47", + "code_hash": "c81b23acb81a", "dependencies": { "dependencies": [ - { - "name": "langflow", - "version": "1.5.0.post2" - }, { "name": "opensearchpy", "version": "2.8.0" + }, + { + "name": "lfx", + "version": null } ], "total_dependencies": 2 }, - "module": "custom_components.opensearch_hybrid" + "module": "custom_components.opensearch" }, "minimized": false, "output_types": [], @@ -463,7 +600,7 @@ "cache": true, "display_name": "Vector Store Connection", "group_outputs": false, - "hidden": true, + "hidden": false, "method": "as_vector_store", "name": "vectorstoreconnection", "options": null, @@ -484,9 +621,10 @@ "advanced": false, "combobox": false, "dialog_inputs": {}, - "display_name": "Auth Mode", + "display_name": "Authentication Mode", "dynamic": false, - "info": "Choose Basic (username/password) or JWT (Bearer token).", + "external_options": {}, + "info": "Authentication method: 'basic' for username/password authentication, or 'jwt' for JSON Web Token (Bearer) authentication.", "load_from_db": false, "name": "auth_mode", "options": [ @@ -539,14 +677,17 @@ "show": true, "title_case": false, "type": "code", - "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any, Dict, List, Optional\n\nfrom langflow.base.vectorstores.model import (\n LCVectorStoreComponent,\n check_cached_vector_store,\n)\nfrom langflow.base.vectorstores.vector_store_connection_decorator import (\n vector_store_connection,\n)\nfrom langflow.io import (\n BoolInput,\n DropdownInput,\n HandleInput,\n IntInput,\n MultilineInput,\n SecretStrInput,\n StrInput,\n TableInput,\n)\nfrom langflow.logging import logger\nfrom langflow.schema.data import Data\nfrom opensearchpy import OpenSearch, helpers\n\n\n@vector_store_connection\nclass OpenSearchHybridComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch hybrid search: KNN (k=10, boost=0.7) + multi_match (boost=0.3) with optional filters & min_score.\"\"\"\n\n display_name: str = \"OpenSearch (Hybrid)\"\n name: str = \"OpenSearchHybrid\"\n icon: str = \"OpenSearch\"\n description: str = \"Hybrid search: KNN + keyword, with optional filters, min_score, and aggregations.\"\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[\n i.name for i in LCVectorStoreComponent.inputs\n ], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n \"engine\",\n \"space_type\",\n \"ef_construction\",\n \"m\",\n \"docs_metadata\",\n ]\n\n inputs = [\n TableInput(\n name=\"docs_metadata\",\n display_name=\"Ingestion Metadata\",\n info=\"Key value pairs to be inserted into each ingested document.\",\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Key name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Value of the metadata\",\n },\n ],\n value=[],\n advanced=True,\n ),\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=\"URL for your OpenSearch cluster.\",\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=\"The index to search.\",\n ),\n DropdownInput(\n name=\"engine\",\n display_name=\"Engine\",\n options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n value=\"jvector\",\n info=\"Vector search engine to use.\",\n advanced=True,\n ),\n DropdownInput(\n name=\"space_type\",\n display_name=\"Space Type\",\n options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n value=\"l2\",\n info=\"Distance metric for vector similarity.\",\n advanced=True,\n ),\n IntInput(\n name=\"ef_construction\",\n display_name=\"EF Construction\",\n value=512,\n info=\"Size of the dynamic list used during k-NN graph creation.\",\n advanced=True,\n ),\n IntInput(\n name=\"m\",\n display_name=\"M Parameter\",\n value=16,\n info=\"Number of bidirectional links created for each new element.\",\n advanced=True,\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(\n name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]\n ),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Vector field used for KNN.\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Size (limit)\",\n value=10,\n advanced=True,\n info=\"Default number of hits when no limit provided in filter_expression.\",\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Filter Expression (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON to control filters/limit/score threshold.\\n\"\n \"Accepted shapes:\\n\"\n '1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\\n'\n '2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\\n'\n \"Placeholders with __IMPOSSIBLE_VALUE__ are ignored.\"\n ),\n ),\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Auth Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=\"Choose Basic (username/password) or JWT (Bearer token).\",\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"Password\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"JWT\",\n load_from_db=True,\n show=True,\n info=\"Paste a valid JWT (sent as a header).\",\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n # ----- TLS -----\n BoolInput(name=\"use_ssl\", display_name=\"Use SSL\", value=True, advanced=True),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify Certificates\",\n value=False,\n advanced=True,\n ),\n ]\n\n # ---------- helper functions for index management ----------\n def _default_text_mapping(\n self,\n dim: int,\n engine: str = \"jvector\",\n space_type: str = \"l2\",\n ef_search: int = 512,\n ef_construction: int = 100,\n m: int = 16,\n vector_field: str = \"vector_field\",\n ) -> Dict[str, Any]:\n \"\"\"For Approximate k-NN Search, this is the default mapping to create index.\"\"\"\n return {\n \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n \"mappings\": {\n \"properties\": {\n vector_field: {\n \"type\": \"knn_vector\",\n \"dimension\": dim,\n \"method\": {\n \"name\": \"disk_ann\",\n \"space_type\": space_type,\n \"engine\": engine,\n \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n },\n }\n }\n },\n }\n\n def _validate_aoss_with_engines(self, is_aoss: bool, engine: str) -> None:\n \"\"\"Validate AOSS with the engine.\"\"\"\n if is_aoss and engine != \"nmslib\" and engine != \"faiss\":\n raise ValueError(\n \"Amazon OpenSearch Service Serverless only \"\n \"supports `nmslib` or `faiss` engines\"\n )\n\n def _is_aoss_enabled(self, http_auth: Any) -> bool:\n \"\"\"Check if the service is http_auth is set as `aoss`.\"\"\"\n if (\n http_auth is not None\n and hasattr(http_auth, \"service\")\n and http_auth.service == \"aoss\"\n ):\n return True\n return False\n\n def _bulk_ingest_embeddings(\n self,\n client: OpenSearch,\n index_name: str,\n embeddings: List[List[float]],\n texts: List[str],\n metadatas: Optional[List[dict]] = None,\n ids: Optional[List[str]] = None,\n vector_field: str = \"vector_field\",\n text_field: str = \"text\",\n mapping: Optional[Dict] = None,\n max_chunk_bytes: Optional[int] = 1 * 1024 * 1024,\n is_aoss: bool = False,\n ) -> List[str]:\n \"\"\"Bulk Ingest Embeddings into given index.\"\"\"\n if not mapping:\n mapping = dict()\n\n requests = []\n return_ids = []\n\n for i, text in enumerate(texts):\n metadata = metadatas[i] if metadatas else {}\n _id = ids[i] if ids else str(uuid.uuid4())\n request = {\n \"_op_type\": \"index\",\n \"_index\": index_name,\n vector_field: embeddings[i],\n text_field: text,\n **metadata,\n }\n if is_aoss:\n request[\"id\"] = _id\n else:\n request[\"_id\"] = _id\n requests.append(request)\n return_ids.append(_id)\n self.log(metadatas[i])\n helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n return return_ids\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> Dict[str, Any]:\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n raise ValueError(\"Auth Mode is 'jwt' but no jwt_token was provided.\")\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n raise ValueError(\"Auth Mode is 'basic' but username/password are missing.\")\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n self.log(self.ingest_data)\n client = self.build_client()\n self._add_documents_to_vector_store(client=client)\n return client\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n # Convert DataFrame to Data if needed using parent's method\n self.ingest_data = self._prepare_ingest_data()\n\n docs = self.ingest_data or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Extract texts and metadata from documents\n texts = []\n metadatas = []\n # Process docs_metadata table input into a dict\n additional_metadata = {}\n if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n for item in self.docs_metadata:\n if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n additional_metadata[item[\"key\"]] = item[\"value\"]\n\n for doc_obj in docs:\n data_copy = json.loads(doc_obj.model_dump_json())\n text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n texts.append(text)\n\n # Merge additional metadata from table input\n data_copy.update(additional_metadata)\n\n metadatas.append(data_copy)\n self.log(metadatas)\n if not self.embedding:\n raise ValueError(\"Embedding handle is required to embed documents.\")\n\n # Generate embeddings\n vectors = self.embedding.embed_documents(texts)\n\n if not vectors:\n self.log(\"No vectors generated from documents.\")\n return\n\n # Get vector dimension for mapping\n dim = len(vectors[0]) if vectors else 768 # default fallback\n\n # Check for AOSS\n auth_kwargs = self._build_auth_kwargs()\n is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n # Validate engine with AOSS\n engine = getattr(self, \"engine\", \"jvector\")\n self._validate_aoss_with_engines(is_aoss, engine)\n\n # Create mapping with proper KNN settings\n space_type = getattr(self, \"space_type\", \"l2\")\n ef_construction = getattr(self, \"ef_construction\", 512)\n m = getattr(self, \"m\", 16)\n\n mapping = self._default_text_mapping(\n dim=dim,\n engine=engine,\n space_type=space_type,\n ef_construction=ef_construction,\n m=m,\n vector_field=self.vector_field,\n )\n\n self.log(\n f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\"\n )\n\n # Use the LangChain-style bulk ingestion\n return_ids = self._bulk_ingest_embeddings(\n client=client,\n index_name=self.index_name,\n embeddings=vectors,\n texts=texts,\n metadatas=metadatas,\n vector_field=self.vector_field,\n text_field=\"text\",\n mapping=mapping,\n is_aoss=is_aoss,\n )\n self.log(metadatas)\n\n self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> List[dict]:\n \"\"\"\n Accepts either:\n A) {\"filter\":[ ...term/terms objects... ], \"limit\":..., \"score_threshold\":...}\n B) Context-style: {\"data_sources\":[...], \"document_types\":[...], \"owners\":[...]}\n Returns a list of OS filter clauses (term/terms), skipping placeholders and empty terms.\n \"\"\"\n\n if not filter_obj:\n return []\n\n # If it’s a string, try to parse it once\n if isinstance(filter_obj, str):\n try:\n filter_obj = json.loads(filter_obj)\n except Exception:\n # Not valid JSON → treat as no filters\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n clauses: List[dict] = []\n for f in raw or []:\n if (\n \"term\" in f\n and isinstance(f[\"term\"], dict)\n and not self._is_placeholder_term(f[\"term\"])\n ):\n clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n clauses.append(f)\n return clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\n \"data_sources\": \"filename\",\n \"document_types\": \"mimetype\",\n \"owners\": \"owner\",\n }\n clauses: List[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n clauses.append({\"term\": {field: values[0]}})\n else:\n clauses.append({\"terms\": {field: values}})\n return clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n logger.info(self.ingest_data)\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n raise ValueError(f\"Invalid filter_expression JSON: {e}\") from e\n\n if not self.embedding:\n raise ValueError(\n \"Embedding is required to run hybrid search (KNN + keyword).\"\n )\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7,\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3,\n }\n },\n ],\n \"minimum_should_match\": 1,\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n },\n \"_source\": [\n \"filename\",\n \"mimetype\",\n \"page\",\n \"text\",\n \"source_url\",\n \"owner\",\n \"allowed_users\",\n \"allowed_groups\",\n ],\n \"size\": limit,\n }\n if clauses:\n body[\"query\"][\"bool\"][\"filter\"] = clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n try:\n raw = self.search(self.search_query or \"\")\n return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n self.log(self.ingest_data)\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(\n self, build_config: dict, field_value: str, field_name: str | None = None\n ) -> dict:\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n return build_config\n\n except Exception as e:\n self.log(f\"update_build_config error: {e}\")\n return build_config\n" + "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n This component provides vector storage and retrieval using OpenSearch, combining semantic\n similarity search (KNN) with keyword-based search for optimal results. It supports document\n ingestion, vector embeddings, and advanced filtering with authentication options.\n\n Features:\n - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n - Hybrid search combining KNN vector similarity and keyword matching\n - Flexible authentication (Basic auth, JWT tokens)\n - Advanced filtering and aggregations\n - Metadata injection during document ingestion\n \"\"\"\n\n display_name: str = \"OpenSearch\"\n icon: str = \"OpenSearch\"\n description: str = (\n \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n )\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[i.name for i in LCVectorStoreComponent.inputs], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n \"engine\",\n \"space_type\",\n \"ef_construction\",\n \"m\",\n \"docs_metadata\",\n ]\n\n inputs = [\n TableInput(\n name=\"docs_metadata\",\n display_name=\"Document Metadata\",\n info=(\n \"Additional metadata key-value pairs to be added to all ingested documents. \"\n \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n ),\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Key name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Value of the metadata\",\n },\n ],\n value=[],\n # advanced=True,\n input_types=[\"Data\"]\n ),\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=(\n \"The connection URL for your OpenSearch cluster \"\n \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n ),\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=(\n \"The OpenSearch index name where documents will be stored and searched. \"\n \"Will be created automatically if it doesn't exist.\"\n ),\n ),\n DropdownInput(\n name=\"engine\",\n display_name=\"Vector Engine\",\n options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n value=\"jvector\",\n info=(\n \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n ),\n advanced=True,\n ),\n DropdownInput(\n name=\"space_type\",\n display_name=\"Distance Metric\",\n options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n value=\"l2\",\n info=(\n \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"ef_construction\",\n display_name=\"EF Construction\",\n value=512,\n info=(\n \"Size of the dynamic candidate list during index construction. \"\n \"Higher values improve recall but increase indexing time and memory usage.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"m\",\n display_name=\"M Parameter\",\n value=16,\n info=(\n \"Number of bidirectional connections for each vector in the HNSW graph. \"\n \"Higher values improve search quality but increase memory usage and indexing time.\"\n ),\n advanced=True,\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field Name\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Result Limit\",\n value=10,\n advanced=True,\n info=(\n \"Default maximum number of search results to return when no limit is \"\n \"specified in the filter expression.\"\n ),\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Search Filters (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n \"Format 1 - Explicit filters:\\n\"\n '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n \"Format 2 - Context-style mapping:\\n\"\n '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n ),\n ),\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Authentication Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=(\n \"Authentication method: 'basic' for username/password authentication, \"\n \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n ),\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"OpenSearch Password\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"JWT\",\n load_from_db=False,\n show=True,\n info=(\n \"Valid JSON Web Token for authentication. \"\n \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n ),\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n # ----- TLS -----\n BoolInput(\n name=\"use_ssl\",\n display_name=\"Use SSL/TLS\",\n value=True,\n advanced=True,\n info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n ),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify SSL Certificates\",\n value=False,\n advanced=True,\n info=(\n \"Verify SSL certificates when connecting. \"\n \"Disable for self-signed certificates in development environments.\"\n ),\n ),\n ]\n\n # ---------- helper functions for index management ----------\n def _default_text_mapping(\n self,\n dim: int,\n engine: str = \"jvector\",\n space_type: str = \"l2\",\n ef_search: int = 512,\n ef_construction: int = 100,\n m: int = 16,\n vector_field: str = \"vector_field\",\n ) -> dict[str, Any]:\n \"\"\"Create the default OpenSearch index mapping for vector search.\n\n This method generates the index configuration with k-NN settings optimized\n for approximate nearest neighbor search using the specified vector engine.\n\n Args:\n dim: Dimensionality of the vector embeddings\n engine: Vector search engine (jvector, nmslib, faiss, lucene)\n space_type: Distance metric for similarity calculation\n ef_search: Size of dynamic list used during search\n ef_construction: Size of dynamic list used during index construction\n m: Number of bidirectional links for each vector\n vector_field: Name of the field storing vector embeddings\n\n Returns:\n Dictionary containing OpenSearch index mapping configuration\n \"\"\"\n return {\n \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n \"mappings\": {\n \"properties\": {\n vector_field: {\n \"type\": \"knn_vector\",\n \"dimension\": dim,\n \"method\": {\n \"name\": \"disk_ann\",\n \"space_type\": space_type,\n \"engine\": engine,\n \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n },\n }\n }\n },\n }\n\n def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n Amazon OpenSearch Serverless has restrictions on which vector engines\n can be used. This method ensures the selected engine is compatible.\n\n Args:\n is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n engine: The selected vector search engine\n\n Raises:\n ValueError: If AOSS is used with an incompatible engine\n \"\"\"\n if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n raise ValueError(msg)\n\n def _is_aoss_enabled(self, http_auth: Any) -> bool:\n \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n Args:\n http_auth: The HTTP authentication object\n\n Returns:\n True if AOSS is enabled, False otherwise\n \"\"\"\n return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n def _bulk_ingest_embeddings(\n self,\n client: OpenSearch,\n index_name: str,\n embeddings: list[list[float]],\n texts: list[str],\n metadatas: list[dict] | None = None,\n ids: list[str] | None = None,\n vector_field: str = \"vector_field\",\n text_field: str = \"text\",\n mapping: dict | None = None,\n max_chunk_bytes: int | None = 1 * 1024 * 1024,\n *,\n is_aoss: bool = False,\n ) -> list[str]:\n \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n This method uses bulk operations to insert documents with their vector\n embeddings and metadata into the specified OpenSearch index.\n\n Args:\n client: OpenSearch client instance\n index_name: Target index for document storage\n embeddings: List of vector embeddings for each document\n texts: List of document texts\n metadatas: Optional metadata dictionaries for each document\n ids: Optional document IDs (UUIDs generated if not provided)\n vector_field: Field name for storing vector embeddings\n text_field: Field name for storing document text\n mapping: Optional index mapping configuration\n max_chunk_bytes: Maximum size per bulk request chunk\n is_aoss: Whether using Amazon OpenSearch Serverless\n\n Returns:\n List of document IDs that were successfully ingested\n \"\"\"\n if not mapping:\n mapping = {}\n\n requests = []\n return_ids = []\n\n for i, text in enumerate(texts):\n metadata = metadatas[i] if metadatas else {}\n _id = ids[i] if ids else str(uuid.uuid4())\n request = {\n \"_op_type\": \"index\",\n \"_index\": index_name,\n vector_field: embeddings[i],\n text_field: text,\n **metadata,\n }\n if is_aoss:\n request[\"id\"] = _id\n else:\n request[\"_id\"] = _id\n requests.append(request)\n return_ids.append(_id)\n if metadatas:\n self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n return return_ids\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> dict[str, Any]:\n \"\"\"Build authentication configuration for OpenSearch client.\n\n Constructs the appropriate authentication parameters based on the\n selected auth mode (basic username/password or JWT token).\n\n Returns:\n Dictionary containing authentication configuration\n\n Raises:\n ValueError: If required authentication parameters are missing\n \"\"\"\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n raise ValueError(msg)\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n msg = \"Auth Mode is 'basic' but username/password are missing.\"\n raise ValueError(msg)\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n \"\"\"Create and configure an OpenSearch client instance.\n\n Returns:\n Configured OpenSearch client ready for operations\n \"\"\"\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n self.log(self.ingest_data)\n client = self.build_client()\n self._add_documents_to_vector_store(client=client)\n return client\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n This method handles the complete document ingestion pipeline:\n - Prepares document data and metadata\n - Generates vector embeddings\n - Creates appropriate index mappings\n - Bulk inserts documents with vectors\n\n Args:\n client: OpenSearch client for performing operations\n \"\"\"\n # Convert DataFrame to Data if needed using parent's method\n self.ingest_data = self._prepare_ingest_data()\n\n docs = self.ingest_data or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Extract texts and metadata from documents\n texts = []\n metadatas = []\n # Process docs_metadata table input into a dict\n additional_metadata = {}\n if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n if isinstance(self.docs_metadata[-1], Data):\n logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n self.docs_metadata = self.docs_metadata[-1].data\n logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n additional_metadata.update(self.docs_metadata)\n else:\n for item in self.docs_metadata:\n if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n additional_metadata[item[\"key\"]] = item[\"value\"]\n # Replace string \"None\" values with actual None\n for key, value in additional_metadata.items():\n if value == \"None\":\n additional_metadata[key] = None\n logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n for doc_obj in docs:\n data_copy = json.loads(doc_obj.model_dump_json())\n text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n texts.append(text)\n\n # Merge additional metadata from table input\n data_copy.update(additional_metadata)\n\n metadatas.append(data_copy)\n self.log(metadatas)\n if not self.embedding:\n msg = \"Embedding handle is required to embed documents.\"\n raise ValueError(msg)\n\n # Generate embeddings\n vectors = self.embedding.embed_documents(texts)\n\n if not vectors:\n self.log(\"No vectors generated from documents.\")\n return\n\n # Get vector dimension for mapping\n dim = len(vectors[0]) if vectors else 768 # default fallback\n\n # Check for AOSS\n auth_kwargs = self._build_auth_kwargs()\n is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n # Validate engine with AOSS\n engine = getattr(self, \"engine\", \"jvector\")\n self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n # Create mapping with proper KNN settings\n space_type = getattr(self, \"space_type\", \"l2\")\n ef_construction = getattr(self, \"ef_construction\", 512)\n m = getattr(self, \"m\", 16)\n\n mapping = self._default_text_mapping(\n dim=dim,\n engine=engine,\n space_type=space_type,\n ef_construction=ef_construction,\n m=m,\n vector_field=self.vector_field,\n )\n\n self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n # Use the LangChain-style bulk ingestion\n return_ids = self._bulk_ingest_embeddings(\n client=client,\n index_name=self.index_name,\n embeddings=vectors,\n texts=texts,\n metadatas=metadatas,\n vector_field=self.vector_field,\n text_field=\"text\",\n mapping=mapping,\n is_aoss=is_aoss,\n )\n self.log(metadatas)\n\n self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n This method accepts two filter formats and converts them to standardized\n OpenSearch query clauses:\n\n Format A - Explicit filters:\n {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n \"limit\": 10, \"score_threshold\": 1.5}\n\n Format B - Context-style mapping:\n {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n Args:\n filter_obj: Filter configuration dictionary or None\n\n Returns:\n List of OpenSearch filter clauses (term/terms objects)\n Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n \"\"\"\n if not filter_obj:\n return []\n\n # If it is a string, try to parse it once\n if isinstance(filter_obj, str):\n try:\n filter_obj = json.loads(filter_obj)\n except json.JSONDecodeError:\n # Not valid JSON - treat as no filters\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n explicit_clauses: list[dict] = []\n for f in raw or []:\n if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n explicit_clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n explicit_clauses.append(f)\n return explicit_clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\n \"data_sources\": \"filename\",\n \"document_types\": \"mimetype\",\n \"owners\": \"owner\",\n }\n context_clauses: list[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n context_clauses.append({\"term\": {field: values[0]}})\n else:\n context_clauses.append({\"terms\": {field: values}})\n return context_clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n This method executes a sophisticated search that combines:\n - K-nearest neighbor (KNN) vector similarity search (70% weight)\n - Multi-field keyword search with fuzzy matching (30% weight)\n - Optional filtering and score thresholds\n - Aggregations for faceted search results\n\n Args:\n query: Search query string (used for both vector embedding and keyword search)\n\n Returns:\n List of search results with page_content, metadata, and relevance scores\n\n Raises:\n ValueError: If embedding component is not provided or filter JSON is invalid\n \"\"\"\n logger.info(self.ingest_data)\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n msg = f\"Invalid filter_expression JSON: {e}\"\n raise ValueError(msg) from e\n\n if not self.embedding:\n msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n raise ValueError(msg)\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7,\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3,\n }\n },\n ],\n \"minimum_should_match\": 1,\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n },\n \"_source\": [\n \"filename\",\n \"mimetype\",\n \"page\",\n \"text\",\n \"source_url\",\n \"owner\",\n \"allowed_users\",\n \"allowed_groups\",\n ],\n \"size\": limit,\n }\n if filter_clauses:\n body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n \"\"\"Search documents and return results as Data objects.\n\n This is the main interface method that performs the search using the\n configured search_query and returns results in Langflow's Data format.\n\n Returns:\n List of Data objects containing search results with text and metadata\n\n Raises:\n Exception: If search operation fails\n \"\"\"\n try:\n raw = self.search(self.search_query or \"\")\n return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n self.log(self.ingest_data)\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Dynamically update component configuration based on field changes.\n\n This method handles real-time UI updates, particularly for authentication\n mode changes that show/hide relevant input fields.\n\n Args:\n build_config: Current component configuration\n field_value: New value for the changed field\n field_name: Name of the field that changed\n\n Returns:\n Updated build configuration with appropriate field visibility\n \"\"\"\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n except (KeyError, ValueError) as e:\n self.log(f\"update_build_config error: {e}\")\n\n return build_config\n" }, "docs_metadata": { "_input_type": "TableInput", - "advanced": true, - "display_name": "Ingestion Metadata", + "advanced": false, + "display_name": "Document Metadata", "dynamic": false, - "info": "Key value pairs to be inserted into each ingested document.", + "info": "Additional metadata key-value pairs to be added to all ingested documents. Useful for tagging documents with source information, categories, or other custom attributes.", + "input_types": [ + "Data" + ], "is_list": true, "list_add_label": "Add More", "name": "docs_metadata", @@ -554,36 +695,22 @@ "required": false, "show": true, "table_icon": "Table", - "table_schema": { - "columns": [ - { - "default": "None", - "description": "Key name", - "disable_edit": false, - "display_name": "Key", - "edit_mode": "popover", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "key", - "sortable": true, - "type": "str" - }, - { - "default": "None", - "description": "Value of the metadata", - "disable_edit": false, - "display_name": "Value", - "edit_mode": "popover", - "filterable": true, - "formatter": "text", - "hidden": false, - "name": "value", - "sortable": true, - "type": "str" - } - ] - }, + "table_schema": [ + { + "description": "Key name", + "display_name": "Key", + "formatter": "text", + "name": "key", + "type": "str" + }, + { + "description": "Value of the metadata", + "display_name": "Value", + "formatter": "text", + "name": "value", + "type": "str" + } + ], "title_case": false, "tool_mode": false, "trace_as_metadata": true, @@ -597,7 +724,7 @@ "advanced": true, "display_name": "EF Construction", "dynamic": false, - "info": "Size of the dynamic list used during k-NN graph creation.", + "info": "Size of the dynamic candidate list during index construction. Higher values improve recall but increase indexing time and memory usage.", "list": false, "list_add_label": "Add More", "name": "ef_construction", @@ -635,9 +762,10 @@ "advanced": true, "combobox": false, "dialog_inputs": {}, - "display_name": "Engine", + "display_name": "Vector Engine", "dynamic": false, - "info": "Vector search engine to use.", + "external_options": {}, + "info": "Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.", "load_from_db": false, "name": "engine", "options": [ @@ -661,9 +789,9 @@ "_input_type": "MultilineInput", "advanced": false, "copy_field": false, - "display_name": "Filter Expression (JSON)", + "display_name": "Search Filters (JSON)", "dynamic": false, - "info": "Optional JSON to control filters/limit/score threshold.\nAccepted shapes:\n1) {\"filter\": [ {\"term\": {\"filename\":\"foo\"}}, {\"terms\":{\"owner\":[\"u1\",\"u2\"]}} ], \"limit\": 10, \"score_threshold\": 1.6 }\n2) Context-style maps: {\"data_sources\":[\"fileA\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"123\"]}\nPlaceholders with __IMPOSSIBLE_VALUE__ are ignored.", + "info": "Optional JSON configuration for search filtering, result limits, and score thresholds.\n\nFormat 1 - Explicit filters:\n{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, {\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\n\nFormat 2 - Context-style mapping:\n{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\n\nUse __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.", "input_types": [ "Message" ], @@ -687,7 +815,7 @@ "advanced": false, "display_name": "Index Name", "dynamic": false, - "info": "The index to search.", + "info": "The OpenSearch index name where documents will be stored and searched. Will be created automatically if it doesn't exist.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -746,9 +874,9 @@ "advanced": false, "display_name": "JWT Token", "dynamic": false, - "info": "Paste a valid JWT (sent as a header).", + "info": "Valid JSON Web Token for authentication. Will be sent in the Authorization header (with optional 'Bearer ' prefix).", "input_types": [], - "load_from_db": true, + "load_from_db": false, "name": "jwt_token", "password": true, "placeholder": "", @@ -763,7 +891,7 @@ "advanced": true, "display_name": "M Parameter", "dynamic": false, - "info": "Number of bidirectional links created for each new element.", + "info": "Number of bidirectional connections for each vector in the HNSW graph. Higher values improve search quality but increase memory usage and indexing time.", "list": false, "list_add_label": "Add More", "name": "m", @@ -779,9 +907,9 @@ "number_of_results": { "_input_type": "IntInput", "advanced": true, - "display_name": "Default Size (limit)", + "display_name": "Default Result Limit", "dynamic": false, - "info": "Default number of hits when no limit provided in filter_expression.", + "info": "Default maximum number of search results to return when no limit is specified in the filter expression.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -800,7 +928,7 @@ "advanced": false, "display_name": "OpenSearch URL", "dynamic": false, - "info": "URL for your OpenSearch cluster.", + "info": "The connection URL for your OpenSearch cluster (e.g., http://localhost:9200 for local development or your cloud endpoint).", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -817,7 +945,7 @@ "password": { "_input_type": "SecretStrInput", "advanced": false, - "display_name": "Password", + "display_name": "OpenSearch Password", "dynamic": false, "info": "", "input_types": [], @@ -877,9 +1005,10 @@ "advanced": true, "combobox": false, "dialog_inputs": {}, - "display_name": "Space Type", + "display_name": "Distance Metric", "dynamic": false, - "info": "Distance metric for vector similarity.", + "external_options": {}, + "info": "Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, 'cosinesimil' for cosine similarity, 'innerproduct' for dot product.", "name": "space_type", "options": [ "l2", @@ -902,9 +1031,9 @@ "use_ssl": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Use SSL", + "display_name": "Use SSL/TLS", "dynamic": false, - "info": "", + "info": "Enable SSL/TLS encryption for secure connections to OpenSearch.", "list": false, "list_add_label": "Add More", "name": "use_ssl", @@ -939,9 +1068,9 @@ "vector_field": { "_input_type": "StrInput", "advanced": true, - "display_name": "Vector Field", + "display_name": "Vector Field Name", "dynamic": false, - "info": "Vector field used for KNN.", + "info": "Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.", "list": false, "list_add_label": "Add More", "load_from_db": false, @@ -958,9 +1087,9 @@ "verify_certs": { "_input_type": "BoolInput", "advanced": true, - "display_name": "Verify Certificates", + "display_name": "Verify SSL Certificates", "dynamic": false, - "info": "", + "info": "Verify SSL certificates when connecting. Disable for self-signed certificates in development environments.", "list": false, "list_add_label": "Add More", "name": "verify_certs", @@ -978,12 +1107,12 @@ }, "selected_output": "search_results", "showNode": true, - "type": "OpenSearchHybrid" + "type": "OpenSearchVectorStoreComponent" }, "dragging": false, "id": "OpenSearchHybrid-XtKoA", "measured": { - "height": 760, + "height": 822, "width": 320 }, "position": { @@ -1027,7 +1156,7 @@ "icon": "Docling", "legacy": false, "metadata": { - "code_hash": "880538860431", + "code_hash": "930312ffe40c", "dependencies": { "dependencies": [ { @@ -1043,7 +1172,7 @@ "version": "2.10.6" }, { - "name": "langflow", + "name": "lfx", "version": null } ], @@ -1128,7 +1257,7 @@ "show": true, "title_case": false, "type": "code", - "value": "import base64\nimport time\nfrom concurrent.futures import Future, ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Any\n\nimport httpx\nfrom docling_core.types.doc import DoclingDocument\nfrom pydantic import ValidationError\n\nfrom langflow.base.data import BaseFileComponent\nfrom langflow.inputs import IntInput, NestedDictInput, StrInput\nfrom langflow.inputs.inputs import FloatInput\nfrom langflow.schema import Data\n\n\nclass DoclingRemoteComponent(BaseFileComponent):\n display_name = \"Docling Serve\"\n description = \"Uses Docling to process input documents connecting to your instance of Docling Serve.\"\n documentation = \"https://docling-project.github.io/docling/\"\n trace_type = \"tool\"\n icon = \"Docling\"\n name = \"DoclingRemote\"\n\n MAX_500_RETRIES = 5\n\n # https://docling-project.github.io/docling/usage/supported_formats/\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n ]\n\n inputs = [\n *BaseFileComponent._base_inputs,\n StrInput(\n name=\"api_url\",\n display_name=\"Server address\",\n info=\"URL of the Docling Serve instance.\",\n required=True,\n ),\n IntInput(\n name=\"max_concurrency\",\n display_name=\"Concurrency\",\n info=\"Maximum number of concurrent requests for the server.\",\n advanced=True,\n value=2,\n ),\n FloatInput(\n name=\"max_poll_timeout\",\n display_name=\"Maximum poll time\",\n info=\"Maximum waiting time for the document conversion to complete.\",\n advanced=True,\n value=3600,\n ),\n NestedDictInput(\n name=\"api_headers\",\n display_name=\"HTTP headers\",\n advanced=True,\n required=False,\n info=(\"Optional dictionary of additional headers required for connecting to Docling Serve.\"),\n ),\n NestedDictInput(\n name=\"docling_serve_opts\",\n display_name=\"Docling options\",\n advanced=True,\n required=False,\n info=(\n \"Optional dictionary of additional options. \"\n \"See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.\"\n ),\n ),\n ]\n\n outputs = [\n *BaseFileComponent._base_outputs,\n ]\n\n def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:\n base_url = f\"{self.api_url}/v1alpha\"\n\n def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:\n encoded_doc = base64.b64encode(file_path.read_bytes()).decode()\n payload = {\n \"options\": options,\n \"file_sources\": [{\"base64_string\": encoded_doc, \"filename\": file_path.name}],\n }\n\n response = client.post(f\"{base_url}/convert/source/async\", json=payload)\n response.raise_for_status()\n task = response.json()\n\n http_failures = 0\n retry_status_start = 500\n retry_status_end = 600\n start_wait_time = time.monotonic()\n while task[\"task_status\"] not in (\"success\", \"failure\"):\n # Check if processing exceeds the maximum poll timeout\n processing_time = time.monotonic() - start_wait_time\n if processing_time >= self.max_poll_timeout:\n msg = (\n f\"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}.\"\n \"Please increase the max_poll_timeout parameter or review why the processing \"\n \"takes long on the server.\"\n )\n self.log(msg)\n raise RuntimeError(msg)\n\n # Call for a new status update\n time.sleep(2)\n response = client.get(f\"{base_url}/status/poll/{task['task_id']}\")\n\n # Check if the status call gets into 5xx errors and retry\n if retry_status_start <= response.status_code < retry_status_end:\n http_failures += 1\n if http_failures > self.MAX_500_RETRIES:\n self.log(f\"The status requests got a http response {response.status_code} too many times.\")\n return None\n continue\n\n # Update task status\n task = response.json()\n\n result_resp = client.get(f\"{base_url}/result/{task['task_id']}\")\n result_resp.raise_for_status()\n result = result_resp.json()\n\n if \"json_content\" not in result[\"document\"] or result[\"document\"][\"json_content\"] is None:\n self.log(\"No JSON DoclingDocument found in the result.\")\n return None\n\n try:\n doc = DoclingDocument.model_validate(result[\"document\"][\"json_content\"])\n return Data(data={\"doc\": doc, \"file_path\": str(file_path)})\n except ValidationError as e:\n self.log(f\"Error validating the document. {e}\")\n return None\n\n docling_options = {\n \"to_formats\": [\"json\"],\n \"image_export_mode\": \"placeholder\",\n \"return_as_file\": False,\n **(self.docling_serve_opts or {}),\n }\n\n processed_data: list[Data | None] = []\n with (\n httpx.Client(headers=self.api_headers) as client,\n ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,\n ):\n futures: list[tuple[int, Future]] = []\n for i, file in enumerate(file_list):\n if file.path is None:\n processed_data.append(None)\n continue\n\n futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))\n\n for _index, future in futures:\n try:\n result_data = future.result()\n processed_data.append(result_data)\n except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:\n self.log(f\"Docling remote processing failed: {exc}\")\n raise\n\n return self.rollup_data(file_list, processed_data)\n" + "value": "import base64\nimport time\nfrom concurrent.futures import Future, ThreadPoolExecutor\nfrom pathlib import Path\nfrom typing import Any\n\nimport httpx\nfrom docling_core.types.doc import DoclingDocument\nfrom pydantic import ValidationError\n\nfrom lfx.base.data import BaseFileComponent\nfrom lfx.inputs import IntInput, NestedDictInput, StrInput\nfrom lfx.inputs.inputs import FloatInput\nfrom lfx.schema import Data\n\n\nclass DoclingRemoteComponent(BaseFileComponent):\n display_name = \"Docling Serve\"\n description = \"Uses Docling to process input documents connecting to your instance of Docling Serve.\"\n documentation = \"https://docling-project.github.io/docling/\"\n trace_type = \"tool\"\n icon = \"Docling\"\n name = \"DoclingRemote\"\n\n MAX_500_RETRIES = 5\n\n # https://docling-project.github.io/docling/usage/supported_formats/\n VALID_EXTENSIONS = [\n \"adoc\",\n \"asciidoc\",\n \"asc\",\n \"bmp\",\n \"csv\",\n \"dotx\",\n \"dotm\",\n \"docm\",\n \"docx\",\n \"htm\",\n \"html\",\n \"jpeg\",\n \"json\",\n \"md\",\n \"pdf\",\n \"png\",\n \"potx\",\n \"ppsx\",\n \"pptm\",\n \"potm\",\n \"ppsm\",\n \"pptx\",\n \"tiff\",\n \"txt\",\n \"xls\",\n \"xlsx\",\n \"xhtml\",\n \"xml\",\n \"webp\",\n ]\n\n inputs = [\n *BaseFileComponent.get_base_inputs(),\n StrInput(\n name=\"api_url\",\n display_name=\"Server address\",\n info=\"URL of the Docling Serve instance.\",\n required=True,\n ),\n IntInput(\n name=\"max_concurrency\",\n display_name=\"Concurrency\",\n info=\"Maximum number of concurrent requests for the server.\",\n advanced=True,\n value=2,\n ),\n FloatInput(\n name=\"max_poll_timeout\",\n display_name=\"Maximum poll time\",\n info=\"Maximum waiting time for the document conversion to complete.\",\n advanced=True,\n value=3600,\n ),\n NestedDictInput(\n name=\"api_headers\",\n display_name=\"HTTP headers\",\n advanced=True,\n required=False,\n info=(\"Optional dictionary of additional headers required for connecting to Docling Serve.\"),\n ),\n NestedDictInput(\n name=\"docling_serve_opts\",\n display_name=\"Docling options\",\n advanced=True,\n required=False,\n info=(\n \"Optional dictionary of additional options. \"\n \"See https://github.com/docling-project/docling-serve/blob/main/docs/usage.md for more information.\"\n ),\n ),\n ]\n\n outputs = [\n *BaseFileComponent.get_base_outputs(),\n ]\n\n def process_files(self, file_list: list[BaseFileComponent.BaseFile]) -> list[BaseFileComponent.BaseFile]:\n base_url = f\"{self.api_url}/v1\"\n\n def _convert_document(client: httpx.Client, file_path: Path, options: dict[str, Any]) -> Data | None:\n encoded_doc = base64.b64encode(file_path.read_bytes()).decode()\n payload = {\n \"options\": options,\n \"sources\": [{\"kind\": \"file\", \"base64_string\": encoded_doc, \"filename\": file_path.name}],\n }\n\n response = client.post(f\"{base_url}/convert/source/async\", json=payload)\n response.raise_for_status()\n task = response.json()\n\n http_failures = 0\n retry_status_start = 500\n retry_status_end = 600\n start_wait_time = time.monotonic()\n while task[\"task_status\"] not in (\"success\", \"failure\"):\n # Check if processing exceeds the maximum poll timeout\n processing_time = time.monotonic() - start_wait_time\n if processing_time >= self.max_poll_timeout:\n msg = (\n f\"Processing time {processing_time=} exceeds the maximum poll timeout {self.max_poll_timeout=}.\"\n \"Please increase the max_poll_timeout parameter or review why the processing \"\n \"takes long on the server.\"\n )\n self.log(msg)\n raise RuntimeError(msg)\n\n # Call for a new status update\n time.sleep(2)\n response = client.get(f\"{base_url}/status/poll/{task['task_id']}\")\n\n # Check if the status call gets into 5xx errors and retry\n if retry_status_start <= response.status_code < retry_status_end:\n http_failures += 1\n if http_failures > self.MAX_500_RETRIES:\n self.log(f\"The status requests got a http response {response.status_code} too many times.\")\n return None\n continue\n\n # Update task status\n task = response.json()\n\n result_resp = client.get(f\"{base_url}/result/{task['task_id']}\")\n result_resp.raise_for_status()\n result = result_resp.json()\n\n if \"json_content\" not in result[\"document\"] or result[\"document\"][\"json_content\"] is None:\n self.log(\"No JSON DoclingDocument found in the result.\")\n return None\n\n try:\n doc = DoclingDocument.model_validate(result[\"document\"][\"json_content\"])\n return Data(data={\"doc\": doc, \"file_path\": str(file_path)})\n except ValidationError as e:\n self.log(f\"Error validating the document. {e}\")\n return None\n\n docling_options = {\n \"to_formats\": [\"json\"],\n \"image_export_mode\": \"placeholder\",\n **(self.docling_serve_opts or {}),\n }\n\n processed_data: list[Data | None] = []\n with (\n httpx.Client(headers=self.api_headers) as client,\n ThreadPoolExecutor(max_workers=self.max_concurrency) as executor,\n ):\n futures: list[tuple[int, Future]] = []\n for i, file in enumerate(file_list):\n if file.path is None:\n processed_data.append(None)\n continue\n\n futures.append((i, executor.submit(_convert_document, client, file.path, docling_options)))\n\n for _index, future in futures:\n try:\n result_data = future.result()\n processed_data.append(result_data)\n except (httpx.HTTPStatusError, httpx.RequestError, KeyError, ValueError) as exc:\n self.log(f\"Docling remote processing failed: {exc}\")\n raise\n\n return self.rollup_data(file_list, processed_data)\n" }, "delete_server_file_after_processing": { "_input_type": "BoolInput", @@ -1364,7 +1493,7 @@ "dragging": false, "id": "DoclingRemote-78KoX", "measured": { - "height": 472, + "height": 475, "width": 320 }, "position": { @@ -1403,7 +1532,7 @@ "icon": "Docling", "legacy": false, "metadata": { - "code_hash": "451c9673bd4c", + "code_hash": "4de16ddd37ac", "dependencies": { "dependencies": [ { @@ -1411,7 +1540,7 @@ "version": "2.45.0" }, { - "name": "langflow", + "name": "lfx", "version": null } ], @@ -1474,7 +1603,7 @@ "show": true, "title_case": false, "type": "code", - "value": "from typing import Any\n\nfrom docling_core.types.doc import ImageRefMode\n\nfrom langflow.base.data.docling_utils import extract_docling_documents\nfrom langflow.custom import Component\nfrom langflow.io import DropdownInput, HandleInput, MessageTextInput, Output, StrInput\nfrom langflow.schema import Data, DataFrame\n\n\nclass ExportDoclingDocumentComponent(Component):\n display_name: str = \"Export DoclingDocument\"\n description: str = \"Export DoclingDocument to markdown, html or other formats.\"\n documentation = \"https://docling-project.github.io/docling/\"\n icon = \"Docling\"\n name = \"ExportDoclingDocument\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with documents to export.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n DropdownInput(\n name=\"export_format\",\n display_name=\"Export format\",\n options=[\"Markdown\", \"HTML\", \"Plaintext\", \"DocTags\"],\n info=\"Select the export format to convert the input.\",\n value=\"Markdown\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"image_mode\",\n display_name=\"Image export mode\",\n options=[\"placeholder\", \"embedded\"],\n info=(\n \"Specify how images are exported in the output. Placeholder will replace the images with a string, \"\n \"whereas Embedded will include them as base64 encoded images.\"\n ),\n value=\"placeholder\",\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder betweek pages in the markdown output.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Exported data\", name=\"data\", method=\"export_document\"),\n Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n ]\n\n def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:\n if field_name == \"export_format\" and field_value == \"Markdown\":\n build_config[\"md_image_placeholder\"][\"show\"] = True\n build_config[\"md_page_break_placeholder\"][\"show\"] = True\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value == \"HTML\":\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value in {\"Plaintext\", \"DocTags\"}:\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = False\n\n return build_config\n\n def export_document(self) -> list[Data]:\n documents = extract_docling_documents(self.data_inputs, self.doc_key)\n\n results: list[Data] = []\n try:\n image_mode = ImageRefMode(self.image_mode)\n for doc in documents:\n content = \"\"\n if self.export_format == \"Markdown\":\n content = doc.export_to_markdown(\n image_mode=image_mode,\n image_placeholder=self.md_image_placeholder,\n page_break_placeholder=self.md_page_break_placeholder,\n )\n elif self.export_format == \"HTML\":\n content = doc.export_to_html(image_mode=image_mode)\n elif self.export_format == \"Plaintext\":\n content = doc.export_to_text()\n elif self.export_format == \"DocTags\":\n content = doc.export_to_doctags()\n\n results.append(Data(text=content))\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n return results\n\n def as_dataframe(self) -> DataFrame:\n return DataFrame(self.export_document())\n" + "value": "from typing import Any\n\nfrom docling_core.types.doc import ImageRefMode\n\nfrom lfx.base.data.docling_utils import extract_docling_documents\nfrom lfx.custom import Component\nfrom lfx.io import DropdownInput, HandleInput, MessageTextInput, Output, StrInput\nfrom lfx.schema import Data, DataFrame\n\n\nclass ExportDoclingDocumentComponent(Component):\n display_name: str = \"Export DoclingDocument\"\n description: str = \"Export DoclingDocument to markdown, html or other formats.\"\n documentation = \"https://docling-project.github.io/docling/\"\n icon = \"Docling\"\n name = \"ExportDoclingDocument\"\n\n inputs = [\n HandleInput(\n name=\"data_inputs\",\n display_name=\"Data or DataFrame\",\n info=\"The data with documents to export.\",\n input_types=[\"Data\", \"DataFrame\"],\n required=True,\n ),\n DropdownInput(\n name=\"export_format\",\n display_name=\"Export format\",\n options=[\"Markdown\", \"HTML\", \"Plaintext\", \"DocTags\"],\n info=\"Select the export format to convert the input.\",\n value=\"Markdown\",\n real_time_refresh=True,\n ),\n DropdownInput(\n name=\"image_mode\",\n display_name=\"Image export mode\",\n options=[\"placeholder\", \"embedded\"],\n info=(\n \"Specify how images are exported in the output. Placeholder will replace the images with a string, \"\n \"whereas Embedded will include them as base64 encoded images.\"\n ),\n value=\"placeholder\",\n ),\n StrInput(\n name=\"md_image_placeholder\",\n display_name=\"Image placeholder\",\n info=\"Specify the image placeholder for markdown exports.\",\n value=\"\",\n advanced=True,\n ),\n StrInput(\n name=\"md_page_break_placeholder\",\n display_name=\"Page break placeholder\",\n info=\"Add this placeholder betweek pages in the markdown output.\",\n value=\"\",\n advanced=True,\n ),\n MessageTextInput(\n name=\"doc_key\",\n display_name=\"Doc Key\",\n info=\"The key to use for the DoclingDocument column.\",\n value=\"doc\",\n advanced=True,\n ),\n ]\n\n outputs = [\n Output(display_name=\"Exported data\", name=\"data\", method=\"export_document\"),\n Output(display_name=\"DataFrame\", name=\"dataframe\", method=\"as_dataframe\"),\n ]\n\n def update_build_config(self, build_config: dict, field_value: Any, field_name: str | None = None) -> dict:\n if field_name == \"export_format\" and field_value == \"Markdown\":\n build_config[\"md_image_placeholder\"][\"show\"] = True\n build_config[\"md_page_break_placeholder\"][\"show\"] = True\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value == \"HTML\":\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = True\n elif field_name == \"export_format\" and field_value in {\"Plaintext\", \"DocTags\"}:\n build_config[\"md_image_placeholder\"][\"show\"] = False\n build_config[\"md_page_break_placeholder\"][\"show\"] = False\n build_config[\"image_mode\"][\"show\"] = False\n\n return build_config\n\n def export_document(self) -> list[Data]:\n documents = extract_docling_documents(self.data_inputs, self.doc_key)\n\n results: list[Data] = []\n try:\n image_mode = ImageRefMode(self.image_mode)\n for doc in documents:\n content = \"\"\n if self.export_format == \"Markdown\":\n content = doc.export_to_markdown(\n image_mode=image_mode,\n image_placeholder=self.md_image_placeholder,\n page_break_placeholder=self.md_page_break_placeholder,\n )\n elif self.export_format == \"HTML\":\n content = doc.export_to_html(image_mode=image_mode)\n elif self.export_format == \"Plaintext\":\n content = doc.export_to_text()\n elif self.export_format == \"DocTags\":\n content = doc.export_to_doctags()\n\n results.append(Data(text=content))\n except Exception as e:\n msg = f\"Error splitting text: {e}\"\n raise TypeError(msg) from e\n\n return results\n\n def as_dataframe(self) -> DataFrame:\n return DataFrame(self.export_document())\n" }, "data_inputs": { "_input_type": "HandleInput", @@ -1527,6 +1656,7 @@ "dialog_inputs": {}, "display_name": "Export format", "dynamic": false, + "external_options": {}, "info": "Select the export format to convert the input.", "name": "export_format", "options": [ @@ -1554,6 +1684,7 @@ "dialog_inputs": {}, "display_name": "Image export mode", "dynamic": false, + "external_options": {}, "info": "Specify how images are exported in the output. Placeholder will replace the images with a string, whereas Embedded will include them as base64 encoded images.", "name": "image_mode", "options": [ @@ -1619,7 +1750,7 @@ "dragging": false, "id": "ExportDoclingDocument-xFoCI", "measured": { - "height": 344, + "height": 347, "width": 320 }, "position": { @@ -1657,7 +1788,7 @@ ], "frozen": false, "icon": "binary", - "last_updated": "2025-09-22T15:54:52.885Z", + "last_updated": "2025-09-29T18:46:06.711Z", "legacy": false, "metadata": { "code_hash": "93faf11517da", @@ -1935,21 +2066,740 @@ "x": 1726.6943524438122, "y": 1800.5330404375484 }, - "selected": true, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "AdvancedDynamicFormBuilder-rDFKw", + "node": { + "base_classes": [ + "Data", + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Creates dynamic input fields that can receive data from other components or manual input.", + "display_name": "Create Data", + "documentation": "", + "edited": true, + "field_order": [ + "form_fields", + "include_metadata" + ], + "frozen": false, + "icon": "braces", + "last_updated": "2025-09-29T18:46:20.356Z", + "legacy": false, + "lf_version": "1.6.0", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Data", + "group_outputs": false, + "hidden": null, + "method": "process_form", + "name": "form_data", + "options": null, + "required_inputs": null, + "selected": "Data", + "tool_mode": true, + "types": [ + "Data" + ], + "value": "__UNDEFINED__" + }, + { + "allows_loop": false, + "cache": true, + "display_name": "Message", + "group_outputs": false, + "hidden": null, + "method": "get_message", + "name": "message", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from typing import Any\r\n\r\nfrom langflow.custom import Component\r\nfrom langflow.io import (\r\n BoolInput,\r\n FloatInput,\r\n HandleInput,\r\n IntInput,\r\n MultilineInput,\r\n Output,\r\n StrInput,\r\n TableInput,\r\n)\r\nfrom langflow.schema.data import Data\r\nfrom langflow.schema.message import Message\r\n\r\n\r\nclass CrateData(Component):\r\n \"\"\"Dynamic Form Component\r\n\r\n This component creates dynamic inputs that can receive data from other components\r\n or be filled manually. It demonstrates advanced dynamic input functionality with\r\n component connectivity.\r\n\r\n ## Features\r\n - **Dynamic Input Generation**: Create inputs based on table configuration\r\n - **Component Connectivity**: Inputs can receive data from other components\r\n - **Multiple Input Types**: Support for text, number, boolean, and handle inputs\r\n - **Flexible Data Sources**: Manual input OR component connections\r\n - **Real-time Updates**: Form fields update immediately when table changes\r\n - **Multiple Output Formats**: Data and formatted Message outputs\r\n - **JSON Output**: Collects all dynamic inputs into a structured JSON response\r\n\r\n ## Use Cases\r\n - Dynamic API parameter collection from multiple sources\r\n - Variable data aggregation from different components\r\n - Flexible pipeline configuration\r\n - Multi-source data processing\r\n\r\n ## Field Types Available\r\n - **text**: Single-line text input (can connect to Text/String outputs)\r\n - **multiline**: Multi-line text input (can connect to Text outputs)\r\n - **number**: Integer input (can connect to Number outputs)\r\n - **float**: Decimal number input (can connect to Number outputs)\r\n - **boolean**: True/false checkbox (can connect to Boolean outputs)\r\n - **handle**: Generic data input (can connect to any component output)\r\n - **data**: Structured data input (can connect to Data outputs)\r\n\r\n ## Input Types for Connections\r\n - **Text**: Text/String data from components\r\n - **Data**: Structured data objects\r\n - **Message**: Message objects with text content\r\n - **Number**: Numeric values\r\n - **Boolean**: True/false values\r\n - **Any**: Accepts any type of connection\r\n - **Combinations**: Text,Message | Data,Text | Text,Data,Message | etc.\r\n \"\"\"\r\n\r\n display_name = \"Create Data\"\r\n description = \"Creates dynamic input fields that can receive data from other components or manual input.\"\r\n icon = \"braces\"\r\n name = \"AdvancedDynamicFormBuilder\"\r\n\r\n def __init__(self, **kwargs):\r\n super().__init__(**kwargs)\r\n self._dynamic_inputs = {}\r\n\r\n inputs = [\r\n TableInput(\r\n name=\"form_fields\",\r\n display_name=\"Input Configuration\",\r\n info=\"Define the dynamic form fields. Each row creates a new input field that can connect to other components.\",\r\n table_schema=[\r\n {\r\n \"name\": \"field_name\",\r\n \"display_name\": \"Field Name\",\r\n \"type\": \"str\",\r\n \"description\": \"Name for the field (used as both internal name and display label)\",\r\n },\r\n {\r\n \"name\": \"field_type\",\r\n \"display_name\": \"Field Type\",\r\n \"type\": \"str\",\r\n \"description\": \"Type of input field to create\",\r\n \"options\": [\"Text\", \"Data\", \"Number\", \"Handle\", \"Boolean\"],\r\n \"value\": \"Text\",\r\n },\r\n ],\r\n value=[{\"field_name\": \"field_name\", \"field_type\": \"Text\"}],\r\n real_time_refresh=True,\r\n ),\r\n BoolInput(\r\n name=\"include_metadata\",\r\n display_name=\"Include Metadata\",\r\n info=\"Include form configuration metadata in the output.\",\r\n value=False,\r\n advanced=True,\r\n ),\r\n ]\r\n\r\n outputs = [\r\n Output(display_name=\"Data\", name=\"form_data\", method=\"process_form\"),\r\n Output(display_name=\"Message\", name=\"message\", method=\"get_message\"),\r\n ]\r\n\r\n def update_build_config(self, build_config: dict, field_value: Any, field_name: str = None) -> dict:\r\n \"\"\"Update build configuration to add dynamic inputs that can connect to other components.\"\"\"\r\n if field_name == \"form_fields\":\r\n # Store current values before clearing dynamic inputs\r\n current_values = {}\r\n keys_to_remove = [key for key in build_config if key.startswith(\"dynamic_\")]\r\n for key in keys_to_remove:\r\n # Preserve the current value before deletion\r\n if hasattr(self, key):\r\n current_values[key] = getattr(self, key)\r\n del build_config[key]\r\n\r\n # Add dynamic inputs based on table configuration\r\n # Safety check to ensure field_value is not None and is iterable\r\n if field_value is None:\r\n field_value = []\r\n\r\n for i, field_config in enumerate(field_value):\r\n # Safety check to ensure field_config is not None\r\n if field_config is None:\r\n continue\r\n\r\n field_name = field_config.get(\"field_name\", f\"field_{i}\")\r\n display_name = field_name # Use field_name as display_name\r\n field_type_option = field_config.get(\"field_type\", \"Text\")\r\n default_value = \"\" # All fields have empty default value\r\n required = False # All fields are optional by default\r\n help_text = \"\" # All fields have empty help text\r\n\r\n # Map field type options to actual field types and input types\r\n field_type_mapping = {\r\n \"Text\": {\"field_type\": \"multiline\", \"input_types\": [\"Text\", \"Message\"]},\r\n \"Data\": {\"field_type\": \"data\", \"input_types\": [\"Data\"]},\r\n \"Number\": {\"field_type\": \"number\", \"input_types\": [\"Text\", \"Message\"]},\r\n \"Handle\": {\"field_type\": \"handle\", \"input_types\": [\"Text\", \"Data\", \"Message\"]},\r\n \"Boolean\": {\"field_type\": \"boolean\", \"input_types\": None},\r\n }\r\n\r\n field_config_mapped = field_type_mapping.get(\r\n field_type_option, {\"field_type\": \"text\", \"input_types\": []}\r\n )\r\n field_type = field_config_mapped[\"field_type\"]\r\n input_types_list = field_config_mapped[\"input_types\"]\r\n\r\n # Create the appropriate input type based on field_type\r\n dynamic_input_name = f\"dynamic_{field_name}\"\r\n\r\n if field_type == \"text\":\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n if input_types_list:\r\n build_config[dynamic_input_name] = StrInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Can connect to: {', '.join(input_types_list)})\",\r\n value=current_value,\r\n required=required,\r\n input_types=input_types_list,\r\n )\r\n else:\r\n build_config[dynamic_input_name] = StrInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=help_text,\r\n value=current_value,\r\n required=required,\r\n )\r\n\r\n elif field_type == \"multiline\":\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n if input_types_list:\r\n build_config[dynamic_input_name] = MultilineInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Can connect to: {', '.join(input_types_list)})\",\r\n value=current_value,\r\n required=required,\r\n input_types=input_types_list,\r\n )\r\n else:\r\n build_config[dynamic_input_name] = MultilineInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=help_text,\r\n value=current_value,\r\n required=required,\r\n )\r\n\r\n elif field_type == \"number\":\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n try:\r\n if current_value:\r\n current_int = int(current_value)\r\n else:\r\n current_int = 0\r\n except (ValueError, TypeError):\r\n try:\r\n current_int = int(default_value) if default_value else 0\r\n except ValueError:\r\n current_int = 0\r\n\r\n if input_types_list:\r\n build_config[dynamic_input_name] = IntInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Can connect to: {', '.join(input_types_list)})\",\r\n value=current_int,\r\n required=required,\r\n input_types=input_types_list,\r\n )\r\n else:\r\n build_config[dynamic_input_name] = IntInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=help_text,\r\n value=current_int,\r\n required=required,\r\n )\r\n\r\n elif field_type == \"float\":\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n try:\r\n if current_value:\r\n current_float = float(current_value)\r\n else:\r\n current_float = 0.0\r\n except (ValueError, TypeError):\r\n try:\r\n current_float = float(default_value) if default_value else 0.0\r\n except ValueError:\r\n current_float = 0.0\r\n\r\n if input_types_list:\r\n build_config[dynamic_input_name] = FloatInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Can connect to: {', '.join(input_types_list)})\",\r\n value=current_float,\r\n required=required,\r\n input_types=input_types_list,\r\n )\r\n else:\r\n build_config[dynamic_input_name] = FloatInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=help_text,\r\n value=current_float,\r\n required=required,\r\n )\r\n\r\n elif field_type == \"boolean\":\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n # Convert current value to boolean\r\n if isinstance(current_value, bool):\r\n current_bool = current_value\r\n else:\r\n current_bool = str(current_value).lower() in [\"true\", \"1\", \"yes\"] if current_value else False\r\n\r\n # Boolean fields don't use input_types parameter to avoid errors\r\n build_config[dynamic_input_name] = BoolInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=help_text,\r\n value=current_bool,\r\n input_types=[],\r\n required=required,\r\n )\r\n\r\n elif field_type == \"handle\":\r\n # HandleInput for generic data connections\r\n build_config[dynamic_input_name] = HandleInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Accepts: {', '.join(input_types_list) if input_types_list else 'Any'})\",\r\n input_types=input_types_list if input_types_list else [\"Data\", \"Text\", \"Message\"],\r\n required=required,\r\n )\r\n\r\n elif field_type == \"data\":\r\n # Specialized for Data type connections\r\n build_config[dynamic_input_name] = HandleInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Data input)\",\r\n input_types=[\"Data\"] if not input_types_list else input_types_list,\r\n required=required,\r\n )\r\n\r\n else:\r\n # Default to text input for unknown types\r\n # Use preserved value if available, otherwise use default\r\n current_value = current_values.get(dynamic_input_name, default_value)\r\n if current_value is None:\r\n current_value = default_value\r\n \r\n build_config[dynamic_input_name] = StrInput(\r\n name=dynamic_input_name,\r\n display_name=display_name,\r\n info=f\"{help_text} (Unknown type '{field_type}', defaulting to text)\",\r\n value=current_value,\r\n required=required,\r\n )\r\n\r\n return build_config\r\n\r\n def get_dynamic_values(self) -> dict[str, Any]:\r\n \"\"\"Extract simple values from all dynamic inputs, handling both manual and connected inputs.\"\"\"\r\n dynamic_values = {}\r\n connection_info = {}\r\n form_fields = getattr(self, \"form_fields\", [])\r\n\r\n for field_config in form_fields:\r\n # Safety check to ensure field_config is not None\r\n if field_config is None:\r\n continue\r\n\r\n field_name = field_config.get(\"field_name\", \"\")\r\n if field_name:\r\n dynamic_input_name = f\"dynamic_{field_name}\"\r\n value = getattr(self, dynamic_input_name, None)\r\n\r\n # Extract simple values from connections or manual input\r\n if value is not None:\r\n try:\r\n extracted_value = self._extract_simple_value(value)\r\n dynamic_values[field_name] = extracted_value\r\n\r\n # Determine connection type for status\r\n if hasattr(value, \"text\") and hasattr(value, \"timestamp\"):\r\n connection_info[field_name] = \"Connected (Message)\"\r\n elif hasattr(value, \"data\"):\r\n connection_info[field_name] = \"Connected (Data)\"\r\n elif isinstance(value, (str, int, float, bool, list, dict)):\r\n connection_info[field_name] = \"Manual input\"\r\n else:\r\n connection_info[field_name] = \"Connected (Object)\"\r\n\r\n except Exception:\r\n # Fallback to string representation if all else fails\r\n dynamic_values[field_name] = str(value)\r\n connection_info[field_name] = \"Error\"\r\n else:\r\n # Use empty default value if nothing connected\r\n dynamic_values[field_name] = \"\"\r\n connection_info[field_name] = \"Empty default\"\r\n\r\n # Store connection info for status output\r\n self._connection_info = connection_info\r\n return dynamic_values\r\n\r\n def _extract_simple_value(self, value: Any) -> Any:\r\n \"\"\"Extract the simplest, most useful value from any input type.\"\"\"\r\n # Handle None\r\n if value is None:\r\n return None\r\n\r\n # Handle simple types directly\r\n if isinstance(value, (str, int, float, bool)):\r\n return value\r\n\r\n # Handle lists and tuples - keep simple\r\n if isinstance(value, (list, tuple)):\r\n return [self._extract_simple_value(item) for item in value]\r\n\r\n # Handle dictionaries - keep simple\r\n if isinstance(value, dict):\r\n return {str(k): self._extract_simple_value(v) for k, v in value.items()}\r\n\r\n # Handle Message objects - extract only the text\r\n if hasattr(value, \"text\"):\r\n return str(value.text) if value.text is not None else \"\"\r\n\r\n # Handle Data objects - extract the data content\r\n if hasattr(value, \"data\") and value.data is not None:\r\n return self._extract_simple_value(value.data)\r\n\r\n # For any other object, convert to string\r\n return str(value)\r\n\r\n def process_form(self) -> Data:\r\n \"\"\"Process all dynamic form inputs and return clean data with just field values.\"\"\"\r\n # Get all dynamic values (just the key:value pairs)\r\n dynamic_values = self.get_dynamic_values()\r\n\r\n # Update status with connection info\r\n connected_fields = len([v for v in getattr(self, \"_connection_info\", {}).values() if \"Connected\" in v])\r\n total_fields = len(dynamic_values)\r\n\r\n self.status = f\"Form processed successfully. {connected_fields}/{total_fields} fields connected to components.\"\r\n\r\n # Return clean Data object with just the field values\r\n return Data(data=dynamic_values)\r\n\r\n def get_message(self) -> Message:\r\n \"\"\"Return form data as a formatted text message.\"\"\"\r\n # Get all dynamic values\r\n dynamic_values = self.get_dynamic_values()\r\n\r\n if not dynamic_values:\r\n return Message(text=\"No form data available\")\r\n\r\n # Format as text message\r\n message_lines = [\"📋 Form Data:\"]\r\n message_lines.append(\"=\" * 40)\r\n\r\n for field_name, value in dynamic_values.items():\r\n # Use field_name as display_name\r\n display_name = field_name\r\n\r\n message_lines.append(f\"• {display_name}: {value}\")\r\n\r\n message_lines.append(\"=\" * 40)\r\n message_lines.append(f\"Total fields: {len(dynamic_values)}\")\r\n\r\n message_text = \"\\n\".join(message_lines)\r\n self.status = f\"Message formatted with {len(dynamic_values)} fields\"\r\n\r\n return Message(text=message_text)" + }, + "dynamic_connector_type": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "connector_type", + "dynamic": false, + "helper_text": null, + "info": " (Can connect to: Text, Message)", + "input_types": [ + "Text", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "dynamic_connector_type", + "placeholder": "", + "real_time_refresh": null, + "refresh_button": null, + "refresh_button_text": null, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "dynamic_owner": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "owner", + "dynamic": false, + "helper_text": null, + "info": " (Can connect to: Text, Message)", + "input_types": [ + "Text", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "dynamic_owner", + "placeholder": "", + "real_time_refresh": null, + "refresh_button": null, + "refresh_button_text": null, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "dynamic_owner_email": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "owner_email", + "dynamic": false, + "helper_text": null, + "info": " (Can connect to: Text, Message)", + "input_types": [ + "Text", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "dynamic_owner_email", + "placeholder": "", + "real_time_refresh": null, + "refresh_button": null, + "refresh_button_text": null, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "dynamic_owner_name": { + "_input_type": "MultilineInput", + "advanced": false, + "copy_field": false, + "display_name": "owner_name", + "dynamic": false, + "helper_text": null, + "info": " (Can connect to: Text, Message)", + "input_types": [ + "Text", + "Message" + ], + "list": false, + "list_add_label": "Add More", + "load_from_db": false, + "multiline": true, + "name": "dynamic_owner_name", + "placeholder": "", + "real_time_refresh": null, + "refresh_button": null, + "refresh_button_text": null, + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_input": true, + "trace_as_metadata": true, + "type": "str", + "value": "" + }, + "form_fields": { + "_input_type": "TableInput", + "advanced": false, + "display_name": "Input Configuration", + "dynamic": false, + "info": "Define the dynamic form fields. Each row creates a new input field that can connect to other components.", + "is_list": true, + "list_add_label": "Add More", + "load_from_db": false, + "name": "form_fields", + "placeholder": "", + "real_time_refresh": true, + "required": false, + "show": true, + "table_icon": "Table", + "table_schema": { + "columns": [ + { + "default": "None", + "description": "Name for the field (used as both internal name and display label)", + "disable_edit": false, + "display_name": "Field Name", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "field_name", + "sortable": true, + "type": "str" + }, + { + "default": "None", + "description": "Type of input field to create", + "disable_edit": false, + "display_name": "Field Type", + "edit_mode": "popover", + "filterable": true, + "formatter": "text", + "hidden": false, + "name": "field_type", + "options": [ + "Text", + "Data", + "Number", + "Handle", + "Boolean" + ], + "sortable": true, + "type": "str" + } + ] + }, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "trigger_icon": "Table", + "trigger_text": "Open table", + "type": "table", + "value": [ + { + "field_name": "owner", + "field_type": "Text" + }, + { + "field_name": "owner_name", + "field_type": "Text" + }, + { + "field_name": "owner_email", + "field_type": "Text" + }, + { + "field_name": "connector_type", + "field_type": "Text" + } + ] + }, + "include_metadata": { + "_input_type": "BoolInput", + "advanced": true, + "display_name": "Include Metadata", + "dynamic": false, + "info": "Include form configuration metadata in the output.", + "list": false, + "list_add_label": "Add More", + "name": "include_metadata", + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "tool_mode": false, + "trace_as_metadata": true, + "type": "bool", + "value": false + } + }, + "tool_mode": false + }, + "selected_output": "form_data", + "showNode": true, + "type": "AdvancedDynamicFormBuilder" + }, + "dragging": false, + "id": "AdvancedDynamicFormBuilder-rDFKw", + "measured": { + "height": 552, + "width": 320 + }, + "position": { + "x": 1302.3946037119863, + "y": 2189.6234403785384 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "SecretInput-GddHQ", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Allows the selection of a secret to be generated as output..", + "display_name": "Secret Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": true, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.6.0", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "hidden": null, + "method": "text_response", + "name": "text", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\r\nfrom langflow.io import MultilineInput, Output, SecretStrInput\r\nfrom langflow.schema.message import Message\r\n\r\n\r\nclass SecretInputComponent(TextComponent):\r\n display_name = \"Secret Input\"\r\n description = \"Allows the selection of a secret to be generated as output..\"\r\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\r\n icon = \"type\"\r\n name = \"SecretInput\"\r\n\r\n inputs = [\r\n SecretStrInput(\r\n name=\"input_value\",\r\n display_name=\"Secret\",\r\n info=\"Secret to be passed as input.\",\r\n ),\r\n ]\r\n outputs = [\r\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\r\n ]\r\n\r\n def text_response(self) -> Message:\r\n return Message(\r\n text=self.input_value,\r\n )\r\n" + }, + "input_value": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Secret", + "dynamic": false, + "info": "Secret to be passed as input.", + "input_types": [], + "load_from_db": true, + "name": "input_value", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "CONNECTOR_TYPE" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "SecretInput" + }, + "dragging": false, + "id": "SecretInput-GddHQ", + "measured": { + "height": 220, + "width": 320 + }, + "position": { + "x": 577.3448254571293, + "y": 2047.4618586798306 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "SecretInput-8QSeL", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Allows the selection of a secret to be generated as output..", + "display_name": "Secret Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": true, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.6.0", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "hidden": null, + "method": "text_response", + "name": "text", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\r\nfrom langflow.io import MultilineInput, Output, SecretStrInput\r\nfrom langflow.schema.message import Message\r\n\r\n\r\nclass SecretInputComponent(TextComponent):\r\n display_name = \"Secret Input\"\r\n description = \"Allows the selection of a secret to be generated as output..\"\r\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\r\n icon = \"type\"\r\n name = \"SecretInput\"\r\n\r\n inputs = [\r\n SecretStrInput(\r\n name=\"input_value\",\r\n display_name=\"Secret\",\r\n info=\"Secret to be passed as input.\",\r\n ),\r\n ]\r\n outputs = [\r\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\r\n ]\r\n\r\n def text_response(self) -> Message:\r\n return Message(\r\n text=self.input_value,\r\n )\r\n" + }, + "input_value": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Secret", + "dynamic": false, + "info": "Secret to be passed as input.", + "input_types": [], + "load_from_db": true, + "name": "input_value", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "OWNER" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "SecretInput" + }, + "dragging": false, + "id": "SecretInput-8QSeL", + "measured": { + "height": 220, + "width": 320 + }, + "position": { + "x": 577.7412962636911, + "y": 2323.7696210844183 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "SecretInput-qdu4S", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Allows the selection of a secret to be generated as output..", + "display_name": "Secret Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": true, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.6.0", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "hidden": null, + "method": "text_response", + "name": "text", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\r\nfrom langflow.io import MultilineInput, Output, SecretStrInput\r\nfrom langflow.schema.message import Message\r\n\r\n\r\nclass SecretInputComponent(TextComponent):\r\n display_name = \"Secret Input\"\r\n description = \"Allows the selection of a secret to be generated as output..\"\r\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\r\n icon = \"type\"\r\n name = \"SecretInput\"\r\n\r\n inputs = [\r\n SecretStrInput(\r\n name=\"input_value\",\r\n display_name=\"Secret\",\r\n info=\"Secret to be passed as input.\",\r\n ),\r\n ]\r\n outputs = [\r\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\r\n ]\r\n\r\n def text_response(self) -> Message:\r\n return Message(\r\n text=self.input_value,\r\n )\r\n" + }, + "input_value": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Secret", + "dynamic": false, + "info": "Secret to be passed as input.", + "input_types": [], + "load_from_db": true, + "name": "input_value", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "OWNER_EMAIL" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "SecretInput" + }, + "dragging": false, + "id": "SecretInput-qdu4S", + "measured": { + "height": 220, + "width": 320 + }, + "position": { + "x": 573.1374442858917, + "y": 2610.3226902967785 + }, + "selected": false, + "type": "genericNode" + }, + { + "data": { + "id": "SecretInput-p9iHD", + "node": { + "base_classes": [ + "Message" + ], + "beta": false, + "conditional_paths": [], + "custom_fields": {}, + "description": "Allows the selection of a secret to be generated as output..", + "display_name": "Secret Input", + "documentation": "https://docs.langflow.org/components-io#text-input", + "edited": true, + "field_order": [ + "input_value" + ], + "frozen": false, + "icon": "type", + "legacy": false, + "lf_version": "1.6.0", + "metadata": {}, + "minimized": false, + "output_types": [], + "outputs": [ + { + "allows_loop": false, + "cache": true, + "display_name": "Output Text", + "group_outputs": false, + "hidden": null, + "method": "text_response", + "name": "text", + "options": null, + "required_inputs": null, + "selected": "Message", + "tool_mode": true, + "types": [ + "Message" + ], + "value": "__UNDEFINED__" + } + ], + "pinned": false, + "template": { + "_type": "Component", + "code": { + "advanced": true, + "dynamic": true, + "fileTypes": [], + "file_path": "", + "info": "", + "list": false, + "load_from_db": false, + "multiline": true, + "name": "code", + "password": false, + "placeholder": "", + "required": true, + "show": true, + "title_case": false, + "type": "code", + "value": "from langflow.base.io.text import TextComponent\r\nfrom langflow.io import MultilineInput, Output, SecretStrInput\r\nfrom langflow.schema.message import Message\r\n\r\n\r\nclass SecretInputComponent(TextComponent):\r\n display_name = \"Secret Input\"\r\n description = \"Allows the selection of a secret to be generated as output..\"\r\n documentation: str = \"https://docs.langflow.org/components-io#text-input\"\r\n icon = \"type\"\r\n name = \"SecretInput\"\r\n\r\n inputs = [\r\n SecretStrInput(\r\n name=\"input_value\",\r\n display_name=\"Secret\",\r\n info=\"Secret to be passed as input.\",\r\n ),\r\n ]\r\n outputs = [\r\n Output(display_name=\"Output Text\", name=\"text\", method=\"text_response\"),\r\n ]\r\n\r\n def text_response(self) -> Message:\r\n return Message(\r\n text=self.input_value,\r\n )\r\n" + }, + "input_value": { + "_input_type": "SecretStrInput", + "advanced": false, + "display_name": "Secret", + "dynamic": false, + "info": "Secret to be passed as input.", + "input_types": [], + "load_from_db": true, + "name": "input_value", + "password": true, + "placeholder": "", + "required": false, + "show": true, + "title_case": false, + "type": "str", + "value": "OWNER_NAME" + } + }, + "tool_mode": false + }, + "showNode": true, + "type": "SecretInput" + }, + "dragging": false, + "id": "SecretInput-p9iHD", + "measured": { + "height": 220, + "width": 320 + }, + "position": { + "x": 591.2840506536836, + "y": 2932.4192840543737 + }, + "selected": false, "type": "genericNode" } ], "viewport": { - "x": -767.6929603556041, - "y": -1196.6455082358875, - "zoom": 0.9277466102702023 + "x": 36.02462669663555, + "y": -593.1219298624992, + "zoom": 0.30094376899529046 } }, "description": "Load your data for chat context with Retrieval Augmented Generation.", "endpoint_name": null, "id": "1402618b-e6d1-4ff2-9a11-d6ce71186915", "is_component": false, - "last_tested_version": "1.5.0.post2", + "last_tested_version": "1.6.0", "name": "OpenSearch Ingestion Flow Docling Serve", "tags": [ "openai", diff --git a/flows/openrag_nudges.json b/flows/openrag_nudges.json index df4ce301..90c59abf 100644 --- a/flows/openrag_nudges.json +++ b/flows/openrag_nudges.json @@ -144,6 +144,7 @@ "targetHandle": "{œfieldNameœ:œinput_valueœ,œidœ:œLanguageModelComponent-NSTA6œ,œinputTypesœ:[œMessageœ],œtypeœ:œstrœ}" }, { + "animated": false, "className": "", "data": { "sourceHandle": { @@ -165,6 +166,7 @@ } }, "id": "xy-edge__OpenSearch-iYfjf{œdataTypeœ:œOpenSearchVectorStoreComponentœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}-ParserComponent-tZs7s{œfieldNameœ:œinput_dataœ,œidœ:œParserComponent-tZs7sœ,œinputTypesœ:[œDataFrameœ,œDataœ],œtypeœ:œotherœ}", + "selected": false, "source": "OpenSearch-iYfjf", "sourceHandle": "{œdataTypeœ:œOpenSearchVectorStoreComponentœ,œidœ:œOpenSearch-iYfjfœ,œnameœ:œdataframeœ,œoutput_typesœ:[œDataFrameœ]}", "target": "ParserComponent-tZs7s", @@ -730,7 +732,7 @@ "icon": "OpenSearch", "legacy": false, "metadata": { - "code_hash": "2720a7c68202", + "code_hash": "c81b23acb81a", "dependencies": { "dependencies": [ { @@ -865,14 +867,17 @@ "show": true, "title_case": false, "type": "code", - "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n This component provides vector storage and retrieval using OpenSearch, combining semantic\n similarity search (KNN) with keyword-based search for optimal results. It supports document\n ingestion, vector embeddings, and advanced filtering with authentication options.\n\n Features:\n - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n - Hybrid search combining KNN vector similarity and keyword matching\n - Flexible authentication (Basic auth, JWT tokens)\n - Advanced filtering and aggregations\n - Metadata injection during document ingestion\n \"\"\"\n\n display_name: str = \"OpenSearch\"\n icon: str = \"OpenSearch\"\n description: str = (\n \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n )\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[i.name for i in LCVectorStoreComponent.inputs], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n \"engine\",\n \"space_type\",\n \"ef_construction\",\n \"m\",\n \"docs_metadata\",\n ]\n\n inputs = [\n TableInput(\n name=\"docs_metadata\",\n display_name=\"Document Metadata\",\n info=(\n \"Additional metadata key-value pairs to be added to all ingested documents. \"\n \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n ),\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Key name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Value of the metadata\",\n },\n ],\n value=[],\n advanced=True,\n ),\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=(\n \"The connection URL for your OpenSearch cluster \"\n \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n ),\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=(\n \"The OpenSearch index name where documents will be stored and searched. \"\n \"Will be created automatically if it doesn't exist.\"\n ),\n ),\n DropdownInput(\n name=\"engine\",\n display_name=\"Vector Engine\",\n options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n value=\"jvector\",\n info=(\n \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n ),\n advanced=True,\n ),\n DropdownInput(\n name=\"space_type\",\n display_name=\"Distance Metric\",\n options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n value=\"l2\",\n info=(\n \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"ef_construction\",\n display_name=\"EF Construction\",\n value=512,\n info=(\n \"Size of the dynamic candidate list during index construction. \"\n \"Higher values improve recall but increase indexing time and memory usage.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"m\",\n display_name=\"M Parameter\",\n value=16,\n info=(\n \"Number of bidirectional connections for each vector in the HNSW graph. \"\n \"Higher values improve search quality but increase memory usage and indexing time.\"\n ),\n advanced=True,\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field Name\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Result Limit\",\n value=10,\n advanced=True,\n info=(\n \"Default maximum number of search results to return when no limit is \"\n \"specified in the filter expression.\"\n ),\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Search Filters (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n \"Format 1 - Explicit filters:\\n\"\n '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n \"Format 2 - Context-style mapping:\\n\"\n '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n ),\n ),\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Authentication Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=(\n \"Authentication method: 'basic' for username/password authentication, \"\n \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n ),\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"OpenSearch Password\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"JWT\",\n load_from_db=True,\n show=True,\n info=(\n \"Valid JSON Web Token for authentication. \"\n \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n ),\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n # ----- TLS -----\n BoolInput(\n name=\"use_ssl\",\n display_name=\"Use SSL/TLS\",\n value=True,\n advanced=True,\n info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n ),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify SSL Certificates\",\n value=False,\n advanced=True,\n info=(\n \"Verify SSL certificates when connecting. \"\n \"Disable for self-signed certificates in development environments.\"\n ),\n ),\n ]\n\n # ---------- helper functions for index management ----------\n def _default_text_mapping(\n self,\n dim: int,\n engine: str = \"jvector\",\n space_type: str = \"l2\",\n ef_search: int = 512,\n ef_construction: int = 100,\n m: int = 16,\n vector_field: str = \"vector_field\",\n ) -> dict[str, Any]:\n \"\"\"Create the default OpenSearch index mapping for vector search.\n\n This method generates the index configuration with k-NN settings optimized\n for approximate nearest neighbor search using the specified vector engine.\n\n Args:\n dim: Dimensionality of the vector embeddings\n engine: Vector search engine (jvector, nmslib, faiss, lucene)\n space_type: Distance metric for similarity calculation\n ef_search: Size of dynamic list used during search\n ef_construction: Size of dynamic list used during index construction\n m: Number of bidirectional links for each vector\n vector_field: Name of the field storing vector embeddings\n\n Returns:\n Dictionary containing OpenSearch index mapping configuration\n \"\"\"\n return {\n \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n \"mappings\": {\n \"properties\": {\n vector_field: {\n \"type\": \"knn_vector\",\n \"dimension\": dim,\n \"method\": {\n \"name\": \"disk_ann\",\n \"space_type\": space_type,\n \"engine\": engine,\n \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n },\n }\n }\n },\n }\n\n def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n Amazon OpenSearch Serverless has restrictions on which vector engines\n can be used. This method ensures the selected engine is compatible.\n\n Args:\n is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n engine: The selected vector search engine\n\n Raises:\n ValueError: If AOSS is used with an incompatible engine\n \"\"\"\n if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n raise ValueError(msg)\n\n def _is_aoss_enabled(self, http_auth: Any) -> bool:\n \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n Args:\n http_auth: The HTTP authentication object\n\n Returns:\n True if AOSS is enabled, False otherwise\n \"\"\"\n return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n def _bulk_ingest_embeddings(\n self,\n client: OpenSearch,\n index_name: str,\n embeddings: list[list[float]],\n texts: list[str],\n metadatas: list[dict] | None = None,\n ids: list[str] | None = None,\n vector_field: str = \"vector_field\",\n text_field: str = \"text\",\n mapping: dict | None = None,\n max_chunk_bytes: int | None = 1 * 1024 * 1024,\n *,\n is_aoss: bool = False,\n ) -> list[str]:\n \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n This method uses bulk operations to insert documents with their vector\n embeddings and metadata into the specified OpenSearch index.\n\n Args:\n client: OpenSearch client instance\n index_name: Target index for document storage\n embeddings: List of vector embeddings for each document\n texts: List of document texts\n metadatas: Optional metadata dictionaries for each document\n ids: Optional document IDs (UUIDs generated if not provided)\n vector_field: Field name for storing vector embeddings\n text_field: Field name for storing document text\n mapping: Optional index mapping configuration\n max_chunk_bytes: Maximum size per bulk request chunk\n is_aoss: Whether using Amazon OpenSearch Serverless\n\n Returns:\n List of document IDs that were successfully ingested\n \"\"\"\n if not mapping:\n mapping = {}\n\n requests = []\n return_ids = []\n\n for i, text in enumerate(texts):\n metadata = metadatas[i] if metadatas else {}\n _id = ids[i] if ids else str(uuid.uuid4())\n request = {\n \"_op_type\": \"index\",\n \"_index\": index_name,\n vector_field: embeddings[i],\n text_field: text,\n **metadata,\n }\n if is_aoss:\n request[\"id\"] = _id\n else:\n request[\"_id\"] = _id\n requests.append(request)\n return_ids.append(_id)\n if metadatas:\n self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n return return_ids\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> dict[str, Any]:\n \"\"\"Build authentication configuration for OpenSearch client.\n\n Constructs the appropriate authentication parameters based on the\n selected auth mode (basic username/password or JWT token).\n\n Returns:\n Dictionary containing authentication configuration\n\n Raises:\n ValueError: If required authentication parameters are missing\n \"\"\"\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n raise ValueError(msg)\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n msg = \"Auth Mode is 'basic' but username/password are missing.\"\n raise ValueError(msg)\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n \"\"\"Create and configure an OpenSearch client instance.\n\n Returns:\n Configured OpenSearch client ready for operations\n \"\"\"\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n self.log(self.ingest_data)\n client = self.build_client()\n self._add_documents_to_vector_store(client=client)\n return client\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n This method handles the complete document ingestion pipeline:\n - Prepares document data and metadata\n - Generates vector embeddings\n - Creates appropriate index mappings\n - Bulk inserts documents with vectors\n\n Args:\n client: OpenSearch client for performing operations\n \"\"\"\n # Convert DataFrame to Data if needed using parent's method\n self.ingest_data = self._prepare_ingest_data()\n\n docs = self.ingest_data or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Extract texts and metadata from documents\n texts = []\n metadatas = []\n # Process docs_metadata table input into a dict\n additional_metadata = {}\n if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n for item in self.docs_metadata:\n if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n additional_metadata[item[\"key\"]] = item[\"value\"]\n\n for doc_obj in docs:\n data_copy = json.loads(doc_obj.model_dump_json())\n text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n texts.append(text)\n\n # Merge additional metadata from table input\n data_copy.update(additional_metadata)\n\n metadatas.append(data_copy)\n self.log(metadatas)\n if not self.embedding:\n msg = \"Embedding handle is required to embed documents.\"\n raise ValueError(msg)\n\n # Generate embeddings\n vectors = self.embedding.embed_documents(texts)\n\n if not vectors:\n self.log(\"No vectors generated from documents.\")\n return\n\n # Get vector dimension for mapping\n dim = len(vectors[0]) if vectors else 768 # default fallback\n\n # Check for AOSS\n auth_kwargs = self._build_auth_kwargs()\n is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n # Validate engine with AOSS\n engine = getattr(self, \"engine\", \"jvector\")\n self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n # Create mapping with proper KNN settings\n space_type = getattr(self, \"space_type\", \"l2\")\n ef_construction = getattr(self, \"ef_construction\", 512)\n m = getattr(self, \"m\", 16)\n\n mapping = self._default_text_mapping(\n dim=dim,\n engine=engine,\n space_type=space_type,\n ef_construction=ef_construction,\n m=m,\n vector_field=self.vector_field,\n )\n\n self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n # Use the LangChain-style bulk ingestion\n return_ids = self._bulk_ingest_embeddings(\n client=client,\n index_name=self.index_name,\n embeddings=vectors,\n texts=texts,\n metadatas=metadatas,\n vector_field=self.vector_field,\n text_field=\"text\",\n mapping=mapping,\n is_aoss=is_aoss,\n )\n self.log(metadatas)\n\n self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n This method accepts two filter formats and converts them to standardized\n OpenSearch query clauses:\n\n Format A - Explicit filters:\n {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n \"limit\": 10, \"score_threshold\": 1.5}\n\n Format B - Context-style mapping:\n {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n Args:\n filter_obj: Filter configuration dictionary or None\n\n Returns:\n List of OpenSearch filter clauses (term/terms objects)\n Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n \"\"\"\n if not filter_obj:\n return []\n\n # If it is a string, try to parse it once\n if isinstance(filter_obj, str):\n try:\n filter_obj = json.loads(filter_obj)\n except json.JSONDecodeError:\n # Not valid JSON - treat as no filters\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n explicit_clauses: list[dict] = []\n for f in raw or []:\n if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n explicit_clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n explicit_clauses.append(f)\n return explicit_clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\n \"data_sources\": \"filename\",\n \"document_types\": \"mimetype\",\n \"owners\": \"owner\",\n }\n context_clauses: list[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n context_clauses.append({\"term\": {field: values[0]}})\n else:\n context_clauses.append({\"terms\": {field: values}})\n return context_clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n This method executes a sophisticated search that combines:\n - K-nearest neighbor (KNN) vector similarity search (70% weight)\n - Multi-field keyword search with fuzzy matching (30% weight)\n - Optional filtering and score thresholds\n - Aggregations for faceted search results\n\n Args:\n query: Search query string (used for both vector embedding and keyword search)\n\n Returns:\n List of search results with page_content, metadata, and relevance scores\n\n Raises:\n ValueError: If embedding component is not provided or filter JSON is invalid\n \"\"\"\n logger.info(self.ingest_data)\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n msg = f\"Invalid filter_expression JSON: {e}\"\n raise ValueError(msg) from e\n\n if not self.embedding:\n msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n raise ValueError(msg)\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7,\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3,\n }\n },\n ],\n \"minimum_should_match\": 1,\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n },\n \"_source\": [\n \"filename\",\n \"mimetype\",\n \"page\",\n \"text\",\n \"source_url\",\n \"owner\",\n \"allowed_users\",\n \"allowed_groups\",\n ],\n \"size\": limit,\n }\n if filter_clauses:\n body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n \"\"\"Search documents and return results as Data objects.\n\n This is the main interface method that performs the search using the\n configured search_query and returns results in Langflow's Data format.\n\n Returns:\n List of Data objects containing search results with text and metadata\n\n Raises:\n Exception: If search operation fails\n \"\"\"\n try:\n raw = self.search(self.search_query or \"\")\n return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n self.log(self.ingest_data)\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Dynamically update component configuration based on field changes.\n\n This method handles real-time UI updates, particularly for authentication\n mode changes that show/hide relevant input fields.\n\n Args:\n build_config: Current component configuration\n field_value: New value for the changed field\n field_name: Name of the field that changed\n\n Returns:\n Updated build configuration with appropriate field visibility\n \"\"\"\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n except (KeyError, ValueError) as e:\n self.log(f\"update_build_config error: {e}\")\n\n return build_config\n" + "value": "from __future__ import annotations\n\nimport json\nimport uuid\nfrom typing import Any\n\nfrom opensearchpy import OpenSearch, helpers\n\nfrom lfx.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store\nfrom lfx.base.vectorstores.vector_store_connection_decorator import vector_store_connection\nfrom lfx.io import BoolInput, DropdownInput, HandleInput, IntInput, MultilineInput, SecretStrInput, StrInput, TableInput\nfrom lfx.log import logger\nfrom lfx.schema.data import Data\n\n\n@vector_store_connection\nclass OpenSearchVectorStoreComponent(LCVectorStoreComponent):\n \"\"\"OpenSearch Vector Store Component with Hybrid Search Capabilities.\n\n This component provides vector storage and retrieval using OpenSearch, combining semantic\n similarity search (KNN) with keyword-based search for optimal results. It supports document\n ingestion, vector embeddings, and advanced filtering with authentication options.\n\n Features:\n - Vector storage with configurable engines (jvector, nmslib, faiss, lucene)\n - Hybrid search combining KNN vector similarity and keyword matching\n - Flexible authentication (Basic auth, JWT tokens)\n - Advanced filtering and aggregations\n - Metadata injection during document ingestion\n \"\"\"\n\n display_name: str = \"OpenSearch\"\n icon: str = \"OpenSearch\"\n description: str = (\n \"Store and search documents using OpenSearch with hybrid semantic and keyword search capabilities.\"\n )\n\n # Keys we consider baseline\n default_keys: list[str] = [\n \"opensearch_url\",\n \"index_name\",\n *[i.name for i in LCVectorStoreComponent.inputs], # search_query, add_documents, etc.\n \"embedding\",\n \"vector_field\",\n \"number_of_results\",\n \"auth_mode\",\n \"username\",\n \"password\",\n \"jwt_token\",\n \"jwt_header\",\n \"bearer_prefix\",\n \"use_ssl\",\n \"verify_certs\",\n \"filter_expression\",\n \"engine\",\n \"space_type\",\n \"ef_construction\",\n \"m\",\n \"docs_metadata\",\n ]\n\n inputs = [\n TableInput(\n name=\"docs_metadata\",\n display_name=\"Document Metadata\",\n info=(\n \"Additional metadata key-value pairs to be added to all ingested documents. \"\n \"Useful for tagging documents with source information, categories, or other custom attributes.\"\n ),\n table_schema=[\n {\n \"name\": \"key\",\n \"display_name\": \"Key\",\n \"type\": \"str\",\n \"description\": \"Key name\",\n },\n {\n \"name\": \"value\",\n \"display_name\": \"Value\",\n \"type\": \"str\",\n \"description\": \"Value of the metadata\",\n },\n ],\n value=[],\n # advanced=True,\n input_types=[\"Data\"]\n ),\n StrInput(\n name=\"opensearch_url\",\n display_name=\"OpenSearch URL\",\n value=\"http://localhost:9200\",\n info=(\n \"The connection URL for your OpenSearch cluster \"\n \"(e.g., http://localhost:9200 for local development or your cloud endpoint).\"\n ),\n ),\n StrInput(\n name=\"index_name\",\n display_name=\"Index Name\",\n value=\"langflow\",\n info=(\n \"The OpenSearch index name where documents will be stored and searched. \"\n \"Will be created automatically if it doesn't exist.\"\n ),\n ),\n DropdownInput(\n name=\"engine\",\n display_name=\"Vector Engine\",\n options=[\"jvector\", \"nmslib\", \"faiss\", \"lucene\"],\n value=\"jvector\",\n info=(\n \"Vector search engine for similarity calculations. 'jvector' is recommended for most use cases. \"\n \"Note: Amazon OpenSearch Serverless only supports 'nmslib' or 'faiss'.\"\n ),\n advanced=True,\n ),\n DropdownInput(\n name=\"space_type\",\n display_name=\"Distance Metric\",\n options=[\"l2\", \"l1\", \"cosinesimil\", \"linf\", \"innerproduct\"],\n value=\"l2\",\n info=(\n \"Distance metric for calculating vector similarity. 'l2' (Euclidean) is most common, \"\n \"'cosinesimil' for cosine similarity, 'innerproduct' for dot product.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"ef_construction\",\n display_name=\"EF Construction\",\n value=512,\n info=(\n \"Size of the dynamic candidate list during index construction. \"\n \"Higher values improve recall but increase indexing time and memory usage.\"\n ),\n advanced=True,\n ),\n IntInput(\n name=\"m\",\n display_name=\"M Parameter\",\n value=16,\n info=(\n \"Number of bidirectional connections for each vector in the HNSW graph. \"\n \"Higher values improve search quality but increase memory usage and indexing time.\"\n ),\n advanced=True,\n ),\n *LCVectorStoreComponent.inputs, # includes search_query, add_documents, etc.\n HandleInput(name=\"embedding\", display_name=\"Embedding\", input_types=[\"Embeddings\"]),\n StrInput(\n name=\"vector_field\",\n display_name=\"Vector Field Name\",\n value=\"chunk_embedding\",\n advanced=True,\n info=\"Name of the field in OpenSearch documents that stores the vector embeddings for similarity search.\",\n ),\n IntInput(\n name=\"number_of_results\",\n display_name=\"Default Result Limit\",\n value=10,\n advanced=True,\n info=(\n \"Default maximum number of search results to return when no limit is \"\n \"specified in the filter expression.\"\n ),\n ),\n MultilineInput(\n name=\"filter_expression\",\n display_name=\"Search Filters (JSON)\",\n value=\"\",\n info=(\n \"Optional JSON configuration for search filtering, result limits, and score thresholds.\\n\\n\"\n \"Format 1 - Explicit filters:\\n\"\n '{\"filter\": [{\"term\": {\"filename\":\"doc.pdf\"}}, '\n '{\"terms\":{\"owner\":[\"user1\",\"user2\"]}}], \"limit\": 10, \"score_threshold\": 1.6}\\n\\n'\n \"Format 2 - Context-style mapping:\\n\"\n '{\"data_sources\":[\"file.pdf\"], \"document_types\":[\"application/pdf\"], \"owners\":[\"user123\"]}\\n\\n'\n \"Use __IMPOSSIBLE_VALUE__ as placeholder to ignore specific filters.\"\n ),\n ),\n # ----- Auth controls (dynamic) -----\n DropdownInput(\n name=\"auth_mode\",\n display_name=\"Authentication Mode\",\n value=\"basic\",\n options=[\"basic\", \"jwt\"],\n info=(\n \"Authentication method: 'basic' for username/password authentication, \"\n \"or 'jwt' for JSON Web Token (Bearer) authentication.\"\n ),\n real_time_refresh=True,\n advanced=False,\n ),\n StrInput(\n name=\"username\",\n display_name=\"Username\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"password\",\n display_name=\"OpenSearch Password\",\n value=\"admin\",\n show=False,\n ),\n SecretStrInput(\n name=\"jwt_token\",\n display_name=\"JWT Token\",\n value=\"JWT\",\n load_from_db=False,\n show=True,\n info=(\n \"Valid JSON Web Token for authentication. \"\n \"Will be sent in the Authorization header (with optional 'Bearer ' prefix).\"\n ),\n ),\n StrInput(\n name=\"jwt_header\",\n display_name=\"JWT Header Name\",\n value=\"Authorization\",\n show=False,\n advanced=True,\n ),\n BoolInput(\n name=\"bearer_prefix\",\n display_name=\"Prefix 'Bearer '\",\n value=True,\n show=False,\n advanced=True,\n ),\n # ----- TLS -----\n BoolInput(\n name=\"use_ssl\",\n display_name=\"Use SSL/TLS\",\n value=True,\n advanced=True,\n info=\"Enable SSL/TLS encryption for secure connections to OpenSearch.\",\n ),\n BoolInput(\n name=\"verify_certs\",\n display_name=\"Verify SSL Certificates\",\n value=False,\n advanced=True,\n info=(\n \"Verify SSL certificates when connecting. \"\n \"Disable for self-signed certificates in development environments.\"\n ),\n ),\n ]\n\n # ---------- helper functions for index management ----------\n def _default_text_mapping(\n self,\n dim: int,\n engine: str = \"jvector\",\n space_type: str = \"l2\",\n ef_search: int = 512,\n ef_construction: int = 100,\n m: int = 16,\n vector_field: str = \"vector_field\",\n ) -> dict[str, Any]:\n \"\"\"Create the default OpenSearch index mapping for vector search.\n\n This method generates the index configuration with k-NN settings optimized\n for approximate nearest neighbor search using the specified vector engine.\n\n Args:\n dim: Dimensionality of the vector embeddings\n engine: Vector search engine (jvector, nmslib, faiss, lucene)\n space_type: Distance metric for similarity calculation\n ef_search: Size of dynamic list used during search\n ef_construction: Size of dynamic list used during index construction\n m: Number of bidirectional links for each vector\n vector_field: Name of the field storing vector embeddings\n\n Returns:\n Dictionary containing OpenSearch index mapping configuration\n \"\"\"\n return {\n \"settings\": {\"index\": {\"knn\": True, \"knn.algo_param.ef_search\": ef_search}},\n \"mappings\": {\n \"properties\": {\n vector_field: {\n \"type\": \"knn_vector\",\n \"dimension\": dim,\n \"method\": {\n \"name\": \"disk_ann\",\n \"space_type\": space_type,\n \"engine\": engine,\n \"parameters\": {\"ef_construction\": ef_construction, \"m\": m},\n },\n }\n }\n },\n }\n\n def _validate_aoss_with_engines(self, *, is_aoss: bool, engine: str) -> None:\n \"\"\"Validate engine compatibility with Amazon OpenSearch Serverless (AOSS).\n\n Amazon OpenSearch Serverless has restrictions on which vector engines\n can be used. This method ensures the selected engine is compatible.\n\n Args:\n is_aoss: Whether the connection is to Amazon OpenSearch Serverless\n engine: The selected vector search engine\n\n Raises:\n ValueError: If AOSS is used with an incompatible engine\n \"\"\"\n if is_aoss and engine not in {\"nmslib\", \"faiss\"}:\n msg = \"Amazon OpenSearch Service Serverless only supports `nmslib` or `faiss` engines\"\n raise ValueError(msg)\n\n def _is_aoss_enabled(self, http_auth: Any) -> bool:\n \"\"\"Determine if Amazon OpenSearch Serverless (AOSS) is being used.\n\n Args:\n http_auth: The HTTP authentication object\n\n Returns:\n True if AOSS is enabled, False otherwise\n \"\"\"\n return http_auth is not None and hasattr(http_auth, \"service\") and http_auth.service == \"aoss\"\n\n def _bulk_ingest_embeddings(\n self,\n client: OpenSearch,\n index_name: str,\n embeddings: list[list[float]],\n texts: list[str],\n metadatas: list[dict] | None = None,\n ids: list[str] | None = None,\n vector_field: str = \"vector_field\",\n text_field: str = \"text\",\n mapping: dict | None = None,\n max_chunk_bytes: int | None = 1 * 1024 * 1024,\n *,\n is_aoss: bool = False,\n ) -> list[str]:\n \"\"\"Efficiently ingest multiple documents with embeddings into OpenSearch.\n\n This method uses bulk operations to insert documents with their vector\n embeddings and metadata into the specified OpenSearch index.\n\n Args:\n client: OpenSearch client instance\n index_name: Target index for document storage\n embeddings: List of vector embeddings for each document\n texts: List of document texts\n metadatas: Optional metadata dictionaries for each document\n ids: Optional document IDs (UUIDs generated if not provided)\n vector_field: Field name for storing vector embeddings\n text_field: Field name for storing document text\n mapping: Optional index mapping configuration\n max_chunk_bytes: Maximum size per bulk request chunk\n is_aoss: Whether using Amazon OpenSearch Serverless\n\n Returns:\n List of document IDs that were successfully ingested\n \"\"\"\n if not mapping:\n mapping = {}\n\n requests = []\n return_ids = []\n\n for i, text in enumerate(texts):\n metadata = metadatas[i] if metadatas else {}\n _id = ids[i] if ids else str(uuid.uuid4())\n request = {\n \"_op_type\": \"index\",\n \"_index\": index_name,\n vector_field: embeddings[i],\n text_field: text,\n **metadata,\n }\n if is_aoss:\n request[\"id\"] = _id\n else:\n request[\"_id\"] = _id\n requests.append(request)\n return_ids.append(_id)\n if metadatas:\n self.log(f\"Sample metadata: {metadatas[0] if metadatas else {}}\")\n helpers.bulk(client, requests, max_chunk_bytes=max_chunk_bytes)\n return return_ids\n\n # ---------- auth / client ----------\n def _build_auth_kwargs(self) -> dict[str, Any]:\n \"\"\"Build authentication configuration for OpenSearch client.\n\n Constructs the appropriate authentication parameters based on the\n selected auth mode (basic username/password or JWT token).\n\n Returns:\n Dictionary containing authentication configuration\n\n Raises:\n ValueError: If required authentication parameters are missing\n \"\"\"\n mode = (self.auth_mode or \"basic\").strip().lower()\n if mode == \"jwt\":\n token = (self.jwt_token or \"\").strip()\n if not token:\n msg = \"Auth Mode is 'jwt' but no jwt_token was provided.\"\n raise ValueError(msg)\n header_name = (self.jwt_header or \"Authorization\").strip()\n header_value = f\"Bearer {token}\" if self.bearer_prefix else token\n return {\"headers\": {header_name: header_value}}\n user = (self.username or \"\").strip()\n pwd = (self.password or \"\").strip()\n if not user or not pwd:\n msg = \"Auth Mode is 'basic' but username/password are missing.\"\n raise ValueError(msg)\n return {\"http_auth\": (user, pwd)}\n\n def build_client(self) -> OpenSearch:\n \"\"\"Create and configure an OpenSearch client instance.\n\n Returns:\n Configured OpenSearch client ready for operations\n \"\"\"\n auth_kwargs = self._build_auth_kwargs()\n return OpenSearch(\n hosts=[self.opensearch_url],\n use_ssl=self.use_ssl,\n verify_certs=self.verify_certs,\n ssl_assert_hostname=False,\n ssl_show_warn=False,\n **auth_kwargs,\n )\n\n @check_cached_vector_store\n def build_vector_store(self) -> OpenSearch:\n # Return raw OpenSearch client as our “vector store.”\n self.log(self.ingest_data)\n client = self.build_client()\n self._add_documents_to_vector_store(client=client)\n return client\n\n # ---------- ingest ----------\n def _add_documents_to_vector_store(self, client: OpenSearch) -> None:\n \"\"\"Process and ingest documents into the OpenSearch vector store.\n\n This method handles the complete document ingestion pipeline:\n - Prepares document data and metadata\n - Generates vector embeddings\n - Creates appropriate index mappings\n - Bulk inserts documents with vectors\n\n Args:\n client: OpenSearch client for performing operations\n \"\"\"\n # Convert DataFrame to Data if needed using parent's method\n self.ingest_data = self._prepare_ingest_data()\n\n docs = self.ingest_data or []\n if not docs:\n self.log(\"No documents to ingest.\")\n return\n\n # Extract texts and metadata from documents\n texts = []\n metadatas = []\n # Process docs_metadata table input into a dict\n additional_metadata = {}\n if hasattr(self, \"docs_metadata\") and self.docs_metadata:\n logger.info(f\"[LF] Docs metadata {self.docs_metadata}\")\n if isinstance(self.docs_metadata[-1], Data):\n logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n self.docs_metadata = self.docs_metadata[-1].data\n logger.info(f\"[LF] Docs metadata is a Data object {self.docs_metadata}\")\n additional_metadata.update(self.docs_metadata)\n else:\n for item in self.docs_metadata:\n if isinstance(item, dict) and \"key\" in item and \"value\" in item:\n additional_metadata[item[\"key\"]] = item[\"value\"]\n # Replace string \"None\" values with actual None\n for key, value in additional_metadata.items():\n if value == \"None\":\n additional_metadata[key] = None\n logger.info(f\"[LF] Additional metadata {additional_metadata}\")\n for doc_obj in docs:\n data_copy = json.loads(doc_obj.model_dump_json())\n text = data_copy.pop(doc_obj.text_key, doc_obj.default_value)\n texts.append(text)\n\n # Merge additional metadata from table input\n data_copy.update(additional_metadata)\n\n metadatas.append(data_copy)\n self.log(metadatas)\n if not self.embedding:\n msg = \"Embedding handle is required to embed documents.\"\n raise ValueError(msg)\n\n # Generate embeddings\n vectors = self.embedding.embed_documents(texts)\n\n if not vectors:\n self.log(\"No vectors generated from documents.\")\n return\n\n # Get vector dimension for mapping\n dim = len(vectors[0]) if vectors else 768 # default fallback\n\n # Check for AOSS\n auth_kwargs = self._build_auth_kwargs()\n is_aoss = self._is_aoss_enabled(auth_kwargs.get(\"http_auth\"))\n\n # Validate engine with AOSS\n engine = getattr(self, \"engine\", \"jvector\")\n self._validate_aoss_with_engines(is_aoss=is_aoss, engine=engine)\n\n # Create mapping with proper KNN settings\n space_type = getattr(self, \"space_type\", \"l2\")\n ef_construction = getattr(self, \"ef_construction\", 512)\n m = getattr(self, \"m\", 16)\n\n mapping = self._default_text_mapping(\n dim=dim,\n engine=engine,\n space_type=space_type,\n ef_construction=ef_construction,\n m=m,\n vector_field=self.vector_field,\n )\n\n self.log(f\"Indexing {len(texts)} documents into '{self.index_name}' with proper KNN mapping...\")\n\n # Use the LangChain-style bulk ingestion\n return_ids = self._bulk_ingest_embeddings(\n client=client,\n index_name=self.index_name,\n embeddings=vectors,\n texts=texts,\n metadatas=metadatas,\n vector_field=self.vector_field,\n text_field=\"text\",\n mapping=mapping,\n is_aoss=is_aoss,\n )\n self.log(metadatas)\n\n self.log(f\"Successfully indexed {len(return_ids)} documents.\")\n\n # ---------- helpers for filters ----------\n def _is_placeholder_term(self, term_obj: dict) -> bool:\n # term_obj like {\"filename\": \"__IMPOSSIBLE_VALUE__\"}\n return any(v == \"__IMPOSSIBLE_VALUE__\" for v in term_obj.values())\n\n def _coerce_filter_clauses(self, filter_obj: dict | None) -> list[dict]:\n \"\"\"Convert filter expressions into OpenSearch-compatible filter clauses.\n\n This method accepts two filter formats and converts them to standardized\n OpenSearch query clauses:\n\n Format A - Explicit filters:\n {\"filter\": [{\"term\": {\"field\": \"value\"}}, {\"terms\": {\"field\": [\"val1\", \"val2\"]}}],\n \"limit\": 10, \"score_threshold\": 1.5}\n\n Format B - Context-style mapping:\n {\"data_sources\": [\"file1.pdf\"], \"document_types\": [\"pdf\"], \"owners\": [\"user1\"]}\n\n Args:\n filter_obj: Filter configuration dictionary or None\n\n Returns:\n List of OpenSearch filter clauses (term/terms objects)\n Placeholder values with \"__IMPOSSIBLE_VALUE__\" are ignored\n \"\"\"\n if not filter_obj:\n return []\n\n # If it is a string, try to parse it once\n if isinstance(filter_obj, str):\n try:\n filter_obj = json.loads(filter_obj)\n except json.JSONDecodeError:\n # Not valid JSON - treat as no filters\n return []\n\n # Case A: already an explicit list/dict under \"filter\"\n if \"filter\" in filter_obj:\n raw = filter_obj[\"filter\"]\n if isinstance(raw, dict):\n raw = [raw]\n explicit_clauses: list[dict] = []\n for f in raw or []:\n if \"term\" in f and isinstance(f[\"term\"], dict) and not self._is_placeholder_term(f[\"term\"]):\n explicit_clauses.append(f)\n elif \"terms\" in f and isinstance(f[\"terms\"], dict):\n field, vals = next(iter(f[\"terms\"].items()))\n if isinstance(vals, list) and len(vals) > 0:\n explicit_clauses.append(f)\n return explicit_clauses\n\n # Case B: convert context-style maps into clauses\n field_mapping = {\n \"data_sources\": \"filename\",\n \"document_types\": \"mimetype\",\n \"owners\": \"owner\",\n }\n context_clauses: list[dict] = []\n for k, values in filter_obj.items():\n if not isinstance(values, list):\n continue\n field = field_mapping.get(k, k)\n if len(values) == 0:\n # Match-nothing placeholder (kept to mirror your tool semantics)\n context_clauses.append({\"term\": {field: \"__IMPOSSIBLE_VALUE__\"}})\n elif len(values) == 1:\n if values[0] != \"__IMPOSSIBLE_VALUE__\":\n context_clauses.append({\"term\": {field: values[0]}})\n else:\n context_clauses.append({\"terms\": {field: values}})\n return context_clauses\n\n # ---------- search (single hybrid path matching your tool) ----------\n def search(self, query: str | None = None) -> list[dict[str, Any]]:\n \"\"\"Perform hybrid search combining vector similarity and keyword matching.\n\n This method executes a sophisticated search that combines:\n - K-nearest neighbor (KNN) vector similarity search (70% weight)\n - Multi-field keyword search with fuzzy matching (30% weight)\n - Optional filtering and score thresholds\n - Aggregations for faceted search results\n\n Args:\n query: Search query string (used for both vector embedding and keyword search)\n\n Returns:\n List of search results with page_content, metadata, and relevance scores\n\n Raises:\n ValueError: If embedding component is not provided or filter JSON is invalid\n \"\"\"\n logger.info(self.ingest_data)\n client = self.build_client()\n q = (query or \"\").strip()\n\n # Parse optional filter expression (can be either A or B shape; see _coerce_filter_clauses)\n filter_obj = None\n if getattr(self, \"filter_expression\", \"\") and self.filter_expression.strip():\n try:\n filter_obj = json.loads(self.filter_expression)\n except json.JSONDecodeError as e:\n msg = f\"Invalid filter_expression JSON: {e}\"\n raise ValueError(msg) from e\n\n if not self.embedding:\n msg = \"Embedding is required to run hybrid search (KNN + keyword).\"\n raise ValueError(msg)\n\n # Embed the query\n vec = self.embedding.embed_query(q)\n\n # Build filter clauses (accept both shapes)\n filter_clauses = self._coerce_filter_clauses(filter_obj)\n\n # Respect the tool's limit/threshold defaults\n limit = (filter_obj or {}).get(\"limit\", self.number_of_results)\n score_threshold = (filter_obj or {}).get(\"score_threshold\", 0)\n\n # Build the same hybrid body as your SearchService\n body = {\n \"query\": {\n \"bool\": {\n \"should\": [\n {\n \"knn\": {\n self.vector_field: {\n \"vector\": vec,\n \"k\": 10, # fixed to match the tool\n \"boost\": 0.7,\n }\n }\n },\n {\n \"multi_match\": {\n \"query\": q,\n \"fields\": [\"text^2\", \"filename^1.5\"],\n \"type\": \"best_fields\",\n \"fuzziness\": \"AUTO\",\n \"boost\": 0.3,\n }\n },\n ],\n \"minimum_should_match\": 1,\n }\n },\n \"aggs\": {\n \"data_sources\": {\"terms\": {\"field\": \"filename\", \"size\": 20}},\n \"document_types\": {\"terms\": {\"field\": \"mimetype\", \"size\": 10}},\n \"owners\": {\"terms\": {\"field\": \"owner\", \"size\": 10}},\n },\n \"_source\": [\n \"filename\",\n \"mimetype\",\n \"page\",\n \"text\",\n \"source_url\",\n \"owner\",\n \"allowed_users\",\n \"allowed_groups\",\n ],\n \"size\": limit,\n }\n if filter_clauses:\n body[\"query\"][\"bool\"][\"filter\"] = filter_clauses\n\n if isinstance(score_threshold, (int, float)) and score_threshold > 0:\n # top-level min_score (matches your tool)\n body[\"min_score\"] = score_threshold\n\n resp = client.search(index=self.index_name, body=body)\n hits = resp.get(\"hits\", {}).get(\"hits\", [])\n return [\n {\n \"page_content\": hit[\"_source\"].get(\"text\", \"\"),\n \"metadata\": {k: v for k, v in hit[\"_source\"].items() if k != \"text\"},\n \"score\": hit.get(\"_score\"),\n }\n for hit in hits\n ]\n\n def search_documents(self) -> list[Data]:\n \"\"\"Search documents and return results as Data objects.\n\n This is the main interface method that performs the search using the\n configured search_query and returns results in Langflow's Data format.\n\n Returns:\n List of Data objects containing search results with text and metadata\n\n Raises:\n Exception: If search operation fails\n \"\"\"\n try:\n raw = self.search(self.search_query or \"\")\n return [Data(text=hit[\"page_content\"], **hit[\"metadata\"]) for hit in raw]\n self.log(self.ingest_data)\n except Exception as e:\n self.log(f\"search_documents error: {e}\")\n raise\n\n # -------- dynamic UI handling (auth switch) --------\n async def update_build_config(self, build_config: dict, field_value: str, field_name: str | None = None) -> dict:\n \"\"\"Dynamically update component configuration based on field changes.\n\n This method handles real-time UI updates, particularly for authentication\n mode changes that show/hide relevant input fields.\n\n Args:\n build_config: Current component configuration\n field_value: New value for the changed field\n field_name: Name of the field that changed\n\n Returns:\n Updated build configuration with appropriate field visibility\n \"\"\"\n try:\n if field_name == \"auth_mode\":\n mode = (field_value or \"basic\").strip().lower()\n is_basic = mode == \"basic\"\n is_jwt = mode == \"jwt\"\n\n build_config[\"username\"][\"show\"] = is_basic\n build_config[\"password\"][\"show\"] = is_basic\n\n build_config[\"jwt_token\"][\"show\"] = is_jwt\n build_config[\"jwt_header\"][\"show\"] = is_jwt\n build_config[\"bearer_prefix\"][\"show\"] = is_jwt\n\n build_config[\"username\"][\"required\"] = is_basic\n build_config[\"password\"][\"required\"] = is_basic\n\n build_config[\"jwt_token\"][\"required\"] = is_jwt\n build_config[\"jwt_header\"][\"required\"] = is_jwt\n build_config[\"bearer_prefix\"][\"required\"] = False\n\n if is_basic:\n build_config[\"jwt_token\"][\"value\"] = \"\"\n\n return build_config\n\n except (KeyError, ValueError) as e:\n self.log(f\"update_build_config error: {e}\")\n\n return build_config\n" }, "docs_metadata": { "_input_type": "TableInput", - "advanced": true, + "advanced": false, "display_name": "Document Metadata", "dynamic": false, "info": "Additional metadata key-value pairs to be added to all ingested documents. Useful for tagging documents with source information, categories, or other custom attributes.", + "input_types": [ + "Data" + ], "is_list": true, "list_add_label": "Add More", "name": "docs_metadata", @@ -884,12 +889,14 @@ { "description": "Key name", "display_name": "Key", + "formatter": "text", "name": "key", "type": "str" }, { "description": "Value of the metadata", "display_name": "Value", + "formatter": "text", "name": "value", "type": "str" } @@ -1058,7 +1065,7 @@ "dynamic": false, "info": "Valid JSON Web Token for authentication. Will be sent in the Authorization header (with optional 'Bearer ' prefix).", "input_types": [], - "load_from_db": true, + "load_from_db": false, "name": "jwt_token", "password": true, "placeholder": "", @@ -1066,7 +1073,7 @@ "show": true, "title_case": false, "type": "str", - "value": "JWT" + "value": "" }, "m": { "_input_type": "IntInput", @@ -1294,7 +1301,7 @@ "dragging": false, "id": "OpenSearch-iYfjf", "measured": { - "height": 737, + "height": 822, "width": 320 }, "position": { @@ -1332,6 +1339,7 @@ ], "frozen": false, "icon": "binary", + "last_updated": "2025-09-29T18:47:49.199Z", "legacy": false, "metadata": { "code_hash": "8607e963fdef", @@ -2013,6 +2021,7 @@ ], "frozen": false, "icon": "brain-circuit", + "last_updated": "2025-09-29T18:47:49.200Z", "legacy": false, "metadata": { "code_hash": "bb5f8714781b", @@ -2319,9 +2328,9 @@ } ], "viewport": { - "x": -195.10601766576656, - "y": 113.4638443501205, - "zoom": 0.5524404935324336 + "x": -469.5361909817717, + "y": 81.51136729649903, + "zoom": 0.8111960289133995 } }, "description": "OpenRAG Open Search Nudges generator, based on the Open Search documents and the chat history.",